Package maintenance

OHDSI · Dec 20, 2022 · 334acd3 · 334acd3
1 parent 33c31b3
commit 334acd3
Show file tree

Hide file tree

Showing 62 changed files with 925 additions and 757 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: CohortDiagnostics
 Type: Package
 Title: Diagnostics for OHDSI Cohorts
-Version: 3.1.1
-Date: 2022-07-20
+Version: 3.1.2
+Date: 2022-12-19
 Authors@R: c(
     person("Gowtham", "Rao", email = "[email protected]", role = c("aut", "cre")),
     person("Martijn", "Schuemie", email = "[email protected]", role = c("aut")),

diff --git a/R/CohortCharacterizationDiagnostics.R b/R/CohortCharacterizationDiagnostics.R
@@ -48,12 +48,15 @@ getCohortCharacteristics <- function(connectionDetails = NULL,
           covariateSettings = covariateSettings,
           aggregated = TRUE
         )
-    })
+    }
+  )
   populationSize <-
     attr(x = featureExtractionOutput, which = "metaData")$populationSize
   populationSize <-
-    dplyr::tibble(cohortId = names(populationSize) %>% as.numeric(),
-                  populationSize = populationSize)
+    dplyr::tibble(
+      cohortId = names(populationSize) %>% as.numeric(),
+      populationSize = populationSize
+    )
 
   if (!"analysisRef" %in% names(results)) {
     results$analysisRef <- featureExtractionOutput$analysisRef
@@ -82,8 +85,8 @@ getCohortCharacteristics <- function(connectionDetails = NULL,
       dplyr::mutate(p = sumValue / populationSize)
 
     if (nrow(covariates %>%
-               dplyr::filter(p > 1) %>%
-               dplyr::collect()) > 0) {
+      dplyr::filter(p > 1) %>%
+      dplyr::collect()) > 0) {
       stop(
         paste0(
           "During characterization, population size (denominator) was found to be smaller than features Value (numerator).",
@@ -98,37 +101,37 @@ getCohortCharacteristics <- function(connectionDetails = NULL,
       dplyr::rename(mean = averageValue) %>%
       dplyr::select(-populationSize)
 
-      if (FeatureExtraction::isTemporalCovariateData(featureExtractionOutput)) {
-        covariates <- covariates %>%
-          dplyr::select(
-            cohortId,
-            timeId,
-            covariateId,
-            sumValue,
-            mean,
-            sd
-          )
-          if (length(is.na(covariates$timeId)) > 0) {
-            covariates[is.na(covariates$timeId),]$timeId <- -1
-          }
-      } else {
-        covariates <- covariates %>%
-          dplyr::mutate(timeId = 0) %>%
-          dplyr::select(
-            cohortId,
-            timeId,
-            covariateId,
-            sumValue,
-            mean,
-            sd
-          )
-      }
-      if ("covariates" %in% names(results)) {
-        Andromeda::appendToTable(results$covariates, covariates)
-      } else {
-        results$covariates <- covariates
+    if (FeatureExtraction::isTemporalCovariateData(featureExtractionOutput)) {
+      covariates <- covariates %>%
+        dplyr::select(
+          cohortId,
+          timeId,
+          covariateId,
+          sumValue,
+          mean,
+          sd
+        )
+      if (length(is.na(covariates$timeId)) > 0) {
+        covariates[is.na(covariates$timeId), ]$timeId <- -1
       }
+    } else {
+      covariates <- covariates %>%
+        dplyr::mutate(timeId = 0) %>%
+        dplyr::select(
+          cohortId,
+          timeId,
+          covariateId,
+          sumValue,
+          mean,
+          sd
+        )
     }
+    if ("covariates" %in% names(results)) {
+      Andromeda::appendToTable(results$covariates, covariates)
+    } else {
+      results$covariates <- covariates
+    }
+  }
 
   if ("covariatesContinuous" %in% names(featureExtractionOutput) &&
     dplyr::pull(dplyr::count(featureExtractionOutput$covariatesContinuous)) > 0) {
@@ -151,12 +154,14 @@ getCohortCharacteristics <- function(connectionDetails = NULL,
           sd
         )
       if (length(is.na(covariates$timeId)) > 0) {
-        covariates[is.na(covariates$timeId),]$timeId <- -1
+        covariates[is.na(covariates$timeId), ]$timeId <- -1
       }
     } else {
       covariates <- covariates %>%
-        dplyr::mutate(sumValue = -1,
-                      timeId = 0) %>%
+        dplyr::mutate(
+          sumValue = -1,
+          timeId = 0
+        ) %>%
         dplyr::select(
           cohortId,
           timeId,
@@ -179,10 +184,12 @@ getCohortCharacteristics <- function(connectionDetails = NULL,
   }
 
   delta <- Sys.time() - startTime
-  ParallelLogger::logInfo("Cohort characterization took ",
-                          signif(delta, 3),
-                          " ",
-                          attr(delta, "units"))
+  ParallelLogger::logInfo(
+    "Cohort characterization took ",
+    signif(delta, 3),
+    " ",
+    attr(delta, "units")
+  )
   return(results)
 }
 
@@ -221,9 +228,10 @@ executeCohortCharacterization <- function(connection,
   )
 
   if (!incremental) {
-    for (outputFile in c(covariateValueFileName, covariateValueContFileName,
-                         covariateRefFileName, analysisRefFileName, timeRefFileName)) {
-
+    for (outputFile in c(
+      covariateValueFileName, covariateValueContFileName,
+      covariateRefFileName, analysisRefFileName, timeRefFileName
+    )) {
       if (file.exists(outputFile)) {
         ParallelLogger::logInfo("Not in incremental mode - Removing file", outputFile, " and replacing")
         unlink(outputFile)
@@ -309,10 +317,12 @@ executeCohortCharacterization <- function(connection,
     }
   }
   delta <- Sys.time() - startCohortCharacterization
-  ParallelLogger::logInfo("Running ",
-                          jobName,
-                          " took",
-                          signif(delta, 3),
-                          " ",
-                          attr(delta, "units"))
+  ParallelLogger::logInfo(
+    "Running ",
+    jobName,
+    " took",
+    signif(delta, 3),
+    " ",
+    attr(delta, "units")
+  )
 }
diff --git a/R/CohortRelationship.R b/R/CohortRelationship.R
@@ -122,7 +122,7 @@ runCohortRelationshipDiagnostics <-
             package = utils::packageName()
           )
         )
-      
+
       DatabaseConnector::renderTranslateExecuteSql(
         connection = connection,
         tempEmulationSchema = tempEmulationSchema,
@@ -135,7 +135,7 @@ runCohortRelationshipDiagnostics <-
         cohort_database_schema = cohortDatabaseSchema,
         cohort_table = cohortTable
       )
-      
+
       DatabaseConnector::renderTranslateQuerySqlToAndromeda(
         connection = connection,
         tempEmulationSchema = tempEmulationSchema,
@@ -210,13 +210,17 @@ executeCohortRelationshipDiagnostics <- function(connection,
 
   allCohortIds <- cohortDefinitionSet %>%
     dplyr::select(cohortId, checksum) %>%
-    dplyr::rename(targetCohortId = cohortId,
-                  targetChecksum = checksum) %>%
+    dplyr::rename(
+      targetCohortId = cohortId,
+      targetChecksum = checksum
+    ) %>%
     dplyr::distinct()
   combinationsOfPossibleCohortRelationships <- allCohortIds %>%
     tidyr::crossing(allCohortIds %>%
-                      dplyr::rename(comparatorCohortId = targetCohortId,
-                                    comparatorChecksum = targetChecksum)) %>%
+      dplyr::rename(
+        comparatorCohortId = targetCohortId,
+        comparatorChecksum = targetChecksum
+      )) %>%
     dplyr::filter(targetCohortId != comparatorCohortId) %>%
     dplyr::arrange(targetCohortId, comparatorCohortId) %>%
     dplyr::mutate(checksum = paste0(targetChecksum, comparatorChecksum))
@@ -230,7 +234,7 @@ executeCohortRelationshipDiagnostics <- function(connection,
 
   if (nrow(subset) > 0) {
     if (incremental &&
-        (nrow(cohortDefinitionSet) - (length(subset$targetCohortId %>% unique()))) > 0) {
+      (nrow(cohortDefinitionSet) - (length(subset$targetCohortId %>% unique()))) > 0) {
       ParallelLogger::logInfo(
         sprintf(
           " - Skipping %s target cohorts in incremental mode because the relationships has already been computed with other cohorts.",
@@ -240,12 +244,12 @@ executeCohortRelationshipDiagnostics <- function(connection,
     }
 
     if (incremental &&
-        (nrow(combinationsOfPossibleCohortRelationships) - (
-          nrow(
-            combinationsOfPossibleCohortRelationships %>%
+      (nrow(combinationsOfPossibleCohortRelationships) - (
+        nrow(
+          combinationsOfPossibleCohortRelationships %>%
             dplyr::filter(targetCohortId %in% c(subset$targetCohortId))
-          )
-        )) > 0) {
+        )
+      )) > 0) {
       ParallelLogger::logInfo(
         sprintf(
           " - Skipping %s combinations in incremental mode because these were previously computed.",
@@ -333,7 +337,7 @@ executeCohortRelationshipDiagnostics <- function(connection,
       timeExecution(
         exportFolder,
         "runCohortRelationshipDiagnostics",
-        c(subset[start:end,]$targetCohortId %>% unique(), subset[start:end,]$comparatorCohortId %>% unique()),
+        c(subset[start:end, ]$targetCohortId %>% unique(), subset[start:end, ]$comparatorCohortId %>% unique()),
         parent = "executeCohortRelationshipDiagnostics",
         expr = {
           output <-
@@ -342,10 +346,12 @@ executeCohortRelationshipDiagnostics <- function(connection,
               cohortDatabaseSchema = cohortDatabaseSchema,
               tempEmulationSchema = tempEmulationSchema,
               cohortTable = cohortTable,
-              targetCohortIds = subset[start:end,]$targetCohortId %>% unique(),
-              comparatorCohortIds = subset[start:end,]$comparatorCohortId %>% unique(),
-              relationshipDays = dplyr::tibble(startDay = temporalStartDays,
-                                               endDay = temporalEndDays)
+              targetCohortIds = subset[start:end, ]$targetCohortId %>% unique(),
+              comparatorCohortIds = subset[start:end, ]$comparatorCohortId %>% unique(),
+              relationshipDays = dplyr::tibble(
+                startDay = temporalStartDays,
+                endDay = temporalEndDays
+              )
             )
         }
       )
@@ -364,26 +370,28 @@ executeCohortRelationshipDiagnostics <- function(connection,
       )
 
       recordTasksDone(
-        cohortId = subset[start:end,]$targetCohortId,
-        comparatorId = subset[start:end,]$comparatorCohortId,
-        targetChecksum = subset[start:end,]$targetChecksum,
-        comparatorChecksum = subset[start:end,]$comparatorChecksum,
+        cohortId = subset[start:end, ]$targetCohortId,
+        comparatorId = subset[start:end, ]$comparatorCohortId,
+        targetChecksum = subset[start:end, ]$targetChecksum,
+        comparatorChecksum = subset[start:end, ]$comparatorChecksum,
         task = "runCohortRelationship",
-        checksum = subset[start:end,]$checksum,
+        checksum = subset[start:end, ]$checksum,
         recordKeepingFile = recordKeepingFile,
         incremental = incremental
       )
       deltaIteration <- Sys.time() - startCohortRelationship
-      ParallelLogger::logInfo("    - Running Cohort Relationship iteration with batchsize ",
-                              batchSize,
-                              " from row number ",
-                              start,
-                              " to ",
-                              end,
-                              " took ",
-                              signif(deltaIteration, 3),
-                              " ",
-                              attr(deltaIteration, "units"))
+      ParallelLogger::logInfo(
+        "    - Running Cohort Relationship iteration with batchsize ",
+        batchSize,
+        " from row number ",
+        start,
+        " to ",
+        end,
+        " took ",
+        signif(deltaIteration, 3),
+        " ",
+        attr(deltaIteration, "units")
+      )
     }
   } else {
     ParallelLogger::logInfo("    - Skipping in incremental mode.")