Skip to content

Commit

Permalink
Package maintenance
Browse files Browse the repository at this point in the history
  • Loading branch information
azimov committed Dec 20, 2022
1 parent 33c31b3 commit 334acd3
Show file tree
Hide file tree
Showing 62 changed files with 925 additions and 757 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: CohortDiagnostics
Type: Package
Title: Diagnostics for OHDSI Cohorts
Version: 3.1.1
Date: 2022-07-20
Version: 3.1.2
Date: 2022-12-19
Authors@R: c(
person("Gowtham", "Rao", email = "[email protected]", role = c("aut", "cre")),
person("Martijn", "Schuemie", email = "[email protected]", role = c("aut")),
Expand Down
110 changes: 60 additions & 50 deletions R/CohortCharacterizationDiagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,15 @@ getCohortCharacteristics <- function(connectionDetails = NULL,
covariateSettings = covariateSettings,
aggregated = TRUE
)
})
}
)
populationSize <-
attr(x = featureExtractionOutput, which = "metaData")$populationSize
populationSize <-
dplyr::tibble(cohortId = names(populationSize) %>% as.numeric(),
populationSize = populationSize)
dplyr::tibble(
cohortId = names(populationSize) %>% as.numeric(),
populationSize = populationSize
)

if (!"analysisRef" %in% names(results)) {
results$analysisRef <- featureExtractionOutput$analysisRef
Expand Down Expand Up @@ -82,8 +85,8 @@ getCohortCharacteristics <- function(connectionDetails = NULL,
dplyr::mutate(p = sumValue / populationSize)

if (nrow(covariates %>%
dplyr::filter(p > 1) %>%
dplyr::collect()) > 0) {
dplyr::filter(p > 1) %>%
dplyr::collect()) > 0) {
stop(
paste0(
"During characterization, population size (denominator) was found to be smaller than features Value (numerator).",
Expand All @@ -98,37 +101,37 @@ getCohortCharacteristics <- function(connectionDetails = NULL,
dplyr::rename(mean = averageValue) %>%
dplyr::select(-populationSize)

if (FeatureExtraction::isTemporalCovariateData(featureExtractionOutput)) {
covariates <- covariates %>%
dplyr::select(
cohortId,
timeId,
covariateId,
sumValue,
mean,
sd
)
if (length(is.na(covariates$timeId)) > 0) {
covariates[is.na(covariates$timeId),]$timeId <- -1
}
} else {
covariates <- covariates %>%
dplyr::mutate(timeId = 0) %>%
dplyr::select(
cohortId,
timeId,
covariateId,
sumValue,
mean,
sd
)
}
if ("covariates" %in% names(results)) {
Andromeda::appendToTable(results$covariates, covariates)
} else {
results$covariates <- covariates
if (FeatureExtraction::isTemporalCovariateData(featureExtractionOutput)) {
covariates <- covariates %>%
dplyr::select(
cohortId,
timeId,
covariateId,
sumValue,
mean,
sd
)
if (length(is.na(covariates$timeId)) > 0) {
covariates[is.na(covariates$timeId), ]$timeId <- -1
}
} else {
covariates <- covariates %>%
dplyr::mutate(timeId = 0) %>%
dplyr::select(
cohortId,
timeId,
covariateId,
sumValue,
mean,
sd
)
}
if ("covariates" %in% names(results)) {
Andromeda::appendToTable(results$covariates, covariates)
} else {
results$covariates <- covariates
}
}

if ("covariatesContinuous" %in% names(featureExtractionOutput) &&
dplyr::pull(dplyr::count(featureExtractionOutput$covariatesContinuous)) > 0) {
Expand All @@ -151,12 +154,14 @@ getCohortCharacteristics <- function(connectionDetails = NULL,
sd
)
if (length(is.na(covariates$timeId)) > 0) {
covariates[is.na(covariates$timeId),]$timeId <- -1
covariates[is.na(covariates$timeId), ]$timeId <- -1
}
} else {
covariates <- covariates %>%
dplyr::mutate(sumValue = -1,
timeId = 0) %>%
dplyr::mutate(
sumValue = -1,
timeId = 0
) %>%
dplyr::select(
cohortId,
timeId,
Expand All @@ -179,10 +184,12 @@ getCohortCharacteristics <- function(connectionDetails = NULL,
}

delta <- Sys.time() - startTime
ParallelLogger::logInfo("Cohort characterization took ",
signif(delta, 3),
" ",
attr(delta, "units"))
ParallelLogger::logInfo(
"Cohort characterization took ",
signif(delta, 3),
" ",
attr(delta, "units")
)
return(results)
}

Expand Down Expand Up @@ -221,9 +228,10 @@ executeCohortCharacterization <- function(connection,
)

if (!incremental) {
for (outputFile in c(covariateValueFileName, covariateValueContFileName,
covariateRefFileName, analysisRefFileName, timeRefFileName)) {

for (outputFile in c(
covariateValueFileName, covariateValueContFileName,
covariateRefFileName, analysisRefFileName, timeRefFileName
)) {
if (file.exists(outputFile)) {
ParallelLogger::logInfo("Not in incremental mode - Removing file", outputFile, " and replacing")
unlink(outputFile)
Expand Down Expand Up @@ -309,10 +317,12 @@ executeCohortCharacterization <- function(connection,
}
}
delta <- Sys.time() - startCohortCharacterization
ParallelLogger::logInfo("Running ",
jobName,
" took",
signif(delta, 3),
" ",
attr(delta, "units"))
ParallelLogger::logInfo(
"Running ",
jobName,
" took",
signif(delta, 3),
" ",
attr(delta, "units")
)
}
72 changes: 40 additions & 32 deletions R/CohortRelationship.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ runCohortRelationshipDiagnostics <-
package = utils::packageName()
)
)

DatabaseConnector::renderTranslateExecuteSql(
connection = connection,
tempEmulationSchema = tempEmulationSchema,
Expand All @@ -135,7 +135,7 @@ runCohortRelationshipDiagnostics <-
cohort_database_schema = cohortDatabaseSchema,
cohort_table = cohortTable
)

DatabaseConnector::renderTranslateQuerySqlToAndromeda(
connection = connection,
tempEmulationSchema = tempEmulationSchema,
Expand Down Expand Up @@ -210,13 +210,17 @@ executeCohortRelationshipDiagnostics <- function(connection,

allCohortIds <- cohortDefinitionSet %>%
dplyr::select(cohortId, checksum) %>%
dplyr::rename(targetCohortId = cohortId,
targetChecksum = checksum) %>%
dplyr::rename(
targetCohortId = cohortId,
targetChecksum = checksum
) %>%
dplyr::distinct()
combinationsOfPossibleCohortRelationships <- allCohortIds %>%
tidyr::crossing(allCohortIds %>%
dplyr::rename(comparatorCohortId = targetCohortId,
comparatorChecksum = targetChecksum)) %>%
dplyr::rename(
comparatorCohortId = targetCohortId,
comparatorChecksum = targetChecksum
)) %>%
dplyr::filter(targetCohortId != comparatorCohortId) %>%
dplyr::arrange(targetCohortId, comparatorCohortId) %>%
dplyr::mutate(checksum = paste0(targetChecksum, comparatorChecksum))
Expand All @@ -230,7 +234,7 @@ executeCohortRelationshipDiagnostics <- function(connection,

if (nrow(subset) > 0) {
if (incremental &&
(nrow(cohortDefinitionSet) - (length(subset$targetCohortId %>% unique()))) > 0) {
(nrow(cohortDefinitionSet) - (length(subset$targetCohortId %>% unique()))) > 0) {
ParallelLogger::logInfo(
sprintf(
" - Skipping %s target cohorts in incremental mode because the relationships has already been computed with other cohorts.",
Expand All @@ -240,12 +244,12 @@ executeCohortRelationshipDiagnostics <- function(connection,
}

if (incremental &&
(nrow(combinationsOfPossibleCohortRelationships) - (
nrow(
combinationsOfPossibleCohortRelationships %>%
(nrow(combinationsOfPossibleCohortRelationships) - (
nrow(
combinationsOfPossibleCohortRelationships %>%
dplyr::filter(targetCohortId %in% c(subset$targetCohortId))
)
)) > 0) {
)
)) > 0) {
ParallelLogger::logInfo(
sprintf(
" - Skipping %s combinations in incremental mode because these were previously computed.",
Expand Down Expand Up @@ -333,7 +337,7 @@ executeCohortRelationshipDiagnostics <- function(connection,
timeExecution(
exportFolder,
"runCohortRelationshipDiagnostics",
c(subset[start:end,]$targetCohortId %>% unique(), subset[start:end,]$comparatorCohortId %>% unique()),
c(subset[start:end, ]$targetCohortId %>% unique(), subset[start:end, ]$comparatorCohortId %>% unique()),
parent = "executeCohortRelationshipDiagnostics",
expr = {
output <-
Expand All @@ -342,10 +346,12 @@ executeCohortRelationshipDiagnostics <- function(connection,
cohortDatabaseSchema = cohortDatabaseSchema,
tempEmulationSchema = tempEmulationSchema,
cohortTable = cohortTable,
targetCohortIds = subset[start:end,]$targetCohortId %>% unique(),
comparatorCohortIds = subset[start:end,]$comparatorCohortId %>% unique(),
relationshipDays = dplyr::tibble(startDay = temporalStartDays,
endDay = temporalEndDays)
targetCohortIds = subset[start:end, ]$targetCohortId %>% unique(),
comparatorCohortIds = subset[start:end, ]$comparatorCohortId %>% unique(),
relationshipDays = dplyr::tibble(
startDay = temporalStartDays,
endDay = temporalEndDays
)
)
}
)
Expand All @@ -364,26 +370,28 @@ executeCohortRelationshipDiagnostics <- function(connection,
)

recordTasksDone(
cohortId = subset[start:end,]$targetCohortId,
comparatorId = subset[start:end,]$comparatorCohortId,
targetChecksum = subset[start:end,]$targetChecksum,
comparatorChecksum = subset[start:end,]$comparatorChecksum,
cohortId = subset[start:end, ]$targetCohortId,
comparatorId = subset[start:end, ]$comparatorCohortId,
targetChecksum = subset[start:end, ]$targetChecksum,
comparatorChecksum = subset[start:end, ]$comparatorChecksum,
task = "runCohortRelationship",
checksum = subset[start:end,]$checksum,
checksum = subset[start:end, ]$checksum,
recordKeepingFile = recordKeepingFile,
incremental = incremental
)
deltaIteration <- Sys.time() - startCohortRelationship
ParallelLogger::logInfo(" - Running Cohort Relationship iteration with batchsize ",
batchSize,
" from row number ",
start,
" to ",
end,
" took ",
signif(deltaIteration, 3),
" ",
attr(deltaIteration, "units"))
ParallelLogger::logInfo(
" - Running Cohort Relationship iteration with batchsize ",
batchSize,
" from row number ",
start,
" to ",
end,
" took ",
signif(deltaIteration, 3),
" ",
attr(deltaIteration, "units")
)
}
} else {
ParallelLogger::logInfo(" - Skipping in incremental mode.")
Expand Down
Loading

0 comments on commit 334acd3

Please sign in to comment.