Skip to content

Commit

Permalink
Merge pull request #1015 from OHDSI/develop
Browse files Browse the repository at this point in the history
Release 3.1.1
  • Loading branch information
azimov committed Dec 7, 2022
2 parents 5660d14 + 86608e2 commit 096169a
Show file tree
Hide file tree
Showing 95 changed files with 2,399 additions and 2,938 deletions.
3 changes: 1 addition & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: CohortDiagnostics
Type: Package
Title: Diagnostics for OHDSI Cohorts
Version: 3.1.0
Version: 3.1.1
Date: 2022-07-20
Authors@R: c(
person("Gowtham", "Rao", email = "[email protected]", role = c("aut", "cre")),
Expand Down Expand Up @@ -43,7 +43,6 @@ Suggests:
CirceR,
DT,
Eunomia,
ggiraph,
ggplot2,
htmltools,
knitr,
Expand Down
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ export(getDataMigrator)
export(getDefaultCovariateSettings)
export(getDefaultVocabularyTableNames)
export(getResultsDataModelSpecifications)
export(launchCohortExplorer)
export(launchDiagnosticsExplorer)
export(migrateDataModel)
export(runCohortRelationshipDiagnostics)
Expand Down
14 changes: 14 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
CohortDiagnostics 3.1.1
=======================
Changes:

1. Removed CohortExplorer app as it's now part of a [new package](https://github.com/ohdsi/cohortExplorer).

2. Added support for custom FeatureExtraction features

Bug Fixes:

1. Fixed error when checking for cdm_source table

2. Removal of `.data$` usage across package to fix tidyselect warning

CohortDiagnostics 3.1.0
=======================
Changes:
Expand Down
2 changes: 1 addition & 1 deletion R/CohortCharacterizationDiagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ executeCohortCharacterization <- function(connection,
startCohortCharacterization <- Sys.time()
subset <- subsetToRequiredCohorts(
cohorts = cohorts %>%
dplyr::filter(.data$cohortId %in% instantiatedCohorts),
dplyr::filter(cohortId %in% instantiatedCohorts),
task = task,
incremental = incremental,
recordKeepingFile = recordKeepingFile
Expand Down
32 changes: 16 additions & 16 deletions R/CohortRelationship.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ runCohortRelationshipDiagnostics <-

timePeriods <- relationshipDays %>%
dplyr::distinct() %>%
dplyr::arrange(.data$startDay, .data$endDay) %>%
dplyr::arrange(startDay, endDay) %>%
dplyr::mutate(timeId = dplyr::row_number())

ParallelLogger::logTrace(" - Creating Andromeda object to collect results")
Expand Down Expand Up @@ -161,12 +161,12 @@ runCohortRelationshipDiagnostics <-
resultsInAndromeda$cohortRelationships <-
resultsInAndromeda$cohortRelationships %>%
dplyr::inner_join(resultsInAndromeda$timePeriods, by = "timeId") %>%
dplyr::select(-.data$timeId) %>%
dplyr::select(-timeId) %>%
dplyr::arrange(
.data$cohortId,
.data$comparatorCohortId,
.data$startDay,
.data$endDay
cohortId,
comparatorCohortId,
startDay,
endDay
)
resultsInAndromeda$timePeriods <- NULL

Expand Down Expand Up @@ -209,17 +209,17 @@ executeCohortRelationshipDiagnostics <- function(connection,
startCohortRelationship <- Sys.time()

allCohortIds <- cohortDefinitionSet %>%
dplyr::select(.data$cohortId, .data$checksum) %>%
dplyr::rename(targetCohortId = .data$cohortId,
targetChecksum = .data$checksum) %>%
dplyr::select(cohortId, checksum) %>%
dplyr::rename(targetCohortId = cohortId,
targetChecksum = checksum) %>%
dplyr::distinct()
combinationsOfPossibleCohortRelationships <- allCohortIds %>%
tidyr::crossing(allCohortIds %>%
dplyr::rename(comparatorCohortId = .data$targetCohortId,
comparatorChecksum = .data$targetChecksum)) %>%
dplyr::filter(.data$targetCohortId != .data$comparatorCohortId) %>%
dplyr::arrange(.data$targetCohortId, .data$comparatorCohortId) %>%
dplyr::mutate(checksum = paste0(.data$targetChecksum, .data$comparatorChecksum))
dplyr::rename(comparatorCohortId = targetCohortId,
comparatorChecksum = targetChecksum)) %>%
dplyr::filter(targetCohortId != comparatorCohortId) %>%
dplyr::arrange(targetCohortId, comparatorCohortId) %>%
dplyr::mutate(checksum = paste0(targetChecksum, comparatorChecksum))

subset <- subsetToRequiredCombis(
combis = combinationsOfPossibleCohortRelationships,
Expand All @@ -243,15 +243,15 @@ executeCohortRelationshipDiagnostics <- function(connection,
(nrow(combinationsOfPossibleCohortRelationships) - (
nrow(
combinationsOfPossibleCohortRelationships %>%
dplyr::filter(.data$targetCohortId %in% c(subset$targetCohortId))
dplyr::filter(targetCohortId %in% c(subset$targetCohortId))
)
)) > 0) {
ParallelLogger::logInfo(
sprintf(
" - Skipping %s combinations in incremental mode because these were previously computed.",
nrow(combinationsOfPossibleCohortRelationships) - nrow(
combinationsOfPossibleCohortRelationships %>%
dplyr::filter(.data$targetCohortId %in% c(subset$targetCohortId))
dplyr::filter(targetCohortId %in% c(subset$targetCohortId))
)
)
)
Expand Down
106 changes: 53 additions & 53 deletions R/ConceptSets.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,10 @@ combineConceptSetsFromCohorts <- function(cohorts) {
checkmate::reportAssertions(errorMessage)
checkmate::assertDataFrame(
x = cohorts %>% dplyr::select(
.data$cohortId,
.data$sql,
.data$json,
.data$cohortName
cohortId,
sql,
json,
cohortName
),
any.missing = FALSE,
min.cols = 4,
Expand Down Expand Up @@ -158,25 +158,25 @@ combineConceptSetsFromCohorts <- function(cohorts) {
return(NULL)
}
conceptSets <- dplyr::bind_rows(conceptSets) %>%
dplyr::arrange(.data$cohortId, .data$conceptSetId)
dplyr::arrange(cohortId, conceptSetId)

uniqueConceptSets <- conceptSets %>%
dplyr::select(.data$conceptSetExpression) %>%
dplyr::select(conceptSetExpression) %>%
dplyr::distinct() %>%
dplyr::mutate(uniqueConceptSetId = dplyr::row_number())

conceptSets <- conceptSets %>%
dplyr::inner_join(uniqueConceptSets, by = "conceptSetExpression") %>%
dplyr::distinct() %>%
dplyr::relocate(
.data$uniqueConceptSetId,
.data$cohortId,
.data$conceptSetId
uniqueConceptSetId,
cohortId,
conceptSetId
) %>%
dplyr::arrange(
.data$uniqueConceptSetId,
.data$cohortId,
.data$conceptSetId
uniqueConceptSetId,
cohortId,
conceptSetId
)
return(conceptSets)
}
Expand Down Expand Up @@ -371,7 +371,7 @@ runConceptSetDiagnostics <- function(connection,
# Save concept set metadata ---------------------------------------
conceptSetsExport <- makeDataExportable(
x = conceptSets %>%
dplyr::select(-.data$uniqueConceptSetId) %>%
dplyr::select(-uniqueConceptSetId) %>%
dplyr::distinct(),
tableName = "concept_sets",
minCellCount = minCellCount,
Expand All @@ -387,7 +387,7 @@ runConceptSetDiagnostics <- function(connection,

uniqueConceptSets <-
conceptSets[!duplicated(conceptSets$uniqueConceptSetId),] %>%
dplyr::select(-.data$cohortId, -.data$conceptSetId)
dplyr::select(-cohortId, -conceptSetId)

timeExecution(
exportFolder,
Expand Down Expand Up @@ -468,36 +468,36 @@ runConceptSetDiagnostics <- function(connection,
tidyr::tibble()

counts <- counts %>%
dplyr::rename(uniqueConceptSetId = .data$conceptSetId) %>%
dplyr::rename(uniqueConceptSetId = conceptSetId) %>%
dplyr::inner_join(
conceptSets %>% dplyr::select(
.data$uniqueConceptSetId,
.data$cohortId,
.data$conceptSetId
uniqueConceptSetId,
cohortId,
conceptSetId
),
by = "uniqueConceptSetId"
) %>%
dplyr::select(-.data$uniqueConceptSetId) %>%
dplyr::select(-uniqueConceptSetId) %>%
dplyr::mutate(databaseId = !!databaseId) %>%
dplyr::relocate(
.data$databaseId,
.data$cohortId,
.data$conceptSetId,
.data$conceptId
databaseId,
cohortId,
conceptSetId,
conceptId
) %>%
dplyr::distinct()

counts <- counts %>%
dplyr::group_by(
.data$databaseId,
.data$cohortId,
.data$conceptSetId,
.data$conceptId,
.data$sourceConceptId
databaseId,
cohortId,
conceptSetId,
conceptId,
sourceConceptId
) %>%
dplyr::summarise(
conceptCount = max(.data$conceptCount),
conceptSubjects = max(.data$conceptSubjects)
conceptCount = max(conceptCount),
conceptSubjects = max(conceptSubjects)
) %>%
dplyr::ungroup()

Expand Down Expand Up @@ -611,7 +611,7 @@ runConceptSetDiagnostics <- function(connection,
)
return(tidyr::tibble())
}
primaryCodesetIds <- primaryCodesetIds %>% dplyr::filter(.data$domain %in%
primaryCodesetIds <- primaryCodesetIds %>% dplyr::filter(domain %in%
c(domains$domain %>% unique()))
if (nrow(primaryCodesetIds) == 0) {
warning(
Expand All @@ -624,8 +624,8 @@ runConceptSetDiagnostics <- function(connection,
return(tidyr::tibble())
}
primaryCodesetIds <- conceptSets %>%
dplyr::filter(.data$cohortId %in% cohort$cohortId) %>%
dplyr::select(codeSetIds = .data$conceptSetId, .data$uniqueConceptSetId) %>%
dplyr::filter(cohortId %in% cohort$cohortId) %>%
dplyr::select(codeSetIds = conceptSetId, uniqueConceptSetId) %>%
dplyr::inner_join(primaryCodesetIds, by = "codeSetIds")

pasteIds <- function(row) {
Expand Down Expand Up @@ -712,7 +712,7 @@ runConceptSetDiagnostics <- function(connection,
counts <-
lapply(split(primaryCodesetIds, 1:nrow(primaryCodesetIds)), getCounts) %>%
dplyr::bind_rows() %>%
dplyr::arrange(.data$conceptCount)
dplyr::arrange(conceptCount)

if (nrow(counts) > 0) {
counts$cohortId <- cohort$cohortId
Expand Down Expand Up @@ -849,32 +849,32 @@ runConceptSetDiagnostics <- function(connection,
}
data <- dplyr::bind_rows(data) %>%
dplyr::distinct() %>%
dplyr::rename(uniqueConceptSetId = .data$codesetId) %>%
dplyr::rename(uniqueConceptSetId = codesetId) %>%
dplyr::inner_join(
conceptSets %>%
dplyr::select(
.data$uniqueConceptSetId,
.data$cohortId,
.data$conceptSetId
uniqueConceptSetId,
cohortId,
conceptSetId
),
by = "uniqueConceptSetId"
) %>%
dplyr::select(-.data$uniqueConceptSetId) %>%
dplyr::select(-uniqueConceptSetId) %>%
dplyr::select(
.data$cohortId,
.data$conceptSetId,
.data$conceptId,
.data$conceptCount,
.data$conceptSubjects
cohortId,
conceptSetId,
conceptId,
conceptCount,
conceptSubjects
) %>%
dplyr::group_by(
.data$cohortId,
.data$conceptSetId,
.data$conceptId
cohortId,
conceptSetId,
conceptId
) %>%
dplyr::summarise(
conceptCount = max(.data$conceptCount),
conceptSubjects = max(.data$conceptSubjects)
conceptCount = max(conceptCount),
conceptSubjects = max(conceptSubjects)
) %>%
dplyr::ungroup()
data <- makeDataExportable(
Expand Down Expand Up @@ -941,14 +941,14 @@ runConceptSetDiagnostics <- function(connection,
snakeCaseToCamelCase = TRUE
) %>%
dplyr::tibble() %>%
dplyr::rename(uniqueConceptSetId = .data$codesetId) %>%
dplyr::rename(uniqueConceptSetId = codesetId) %>%
dplyr::inner_join(conceptSets,
by = "uniqueConceptSetId"
) %>%
dplyr::select(
.data$cohortId,
.data$conceptSetId,
.data$conceptId
cohortId,
conceptSetId,
conceptId
)

resolvedConceptIds <- makeDataExportable(
Expand Down
5 changes: 4 additions & 1 deletion R/DataSourceInformation.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,13 @@ getCdmDataSourceInformation <-
}
}

if (!DatabaseConnector::dbExistsTable(conn = connection, name = "cdm_source")) {
if (!DatabaseConnector::existsTable(connection = connection,
databaseSchema = cdmDatabaseSchema,
tableName = "cdm_source")) {
warning("CDM Source table not found in CDM. Metadata on CDM source will be limited.")
return(NULL)
}

sqlCdmDataSource <-
"select * from @cdm_database_schema.cdm_source;"
cdmDataSource <-
Expand Down
20 changes: 10 additions & 10 deletions R/ExportCharacterization.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,30 +31,30 @@ exportCharacterization <- function(characteristics,
} else if (dplyr::pull(dplyr::count(characteristics$covariateRef)) > 0) {
characteristics$filteredCovariates <-
characteristics$covariates %>%
dplyr::filter(.data$mean >= minCharacterizationMean) %>%
dplyr::filter(mean >= minCharacterizationMean) %>%
dplyr::mutate(databaseId = !!databaseId) %>%
dplyr::left_join(counts,
by = c("cohortId", "databaseId"),
copy = TRUE
) %>%
dplyr::mutate(
mean = dplyr::if_else(
.data$mean != 0 & .data$mean < minCellCount / as.numeric(.data$cohortEntries),
-minCellCount / as.numeric(.data$cohortEntries),
.data$mean
mean != 0 & mean < minCellCount / as.numeric(cohortEntries),
-minCellCount / as.numeric(cohortEntries),
mean
),
sumValue = dplyr::if_else(
.data$sumValue != 0 & .data$sumValue < minCellCount,
sumValue != 0 & sumValue < minCellCount,
-minCellCount,
.data$sumValue
sumValue
)
) %>%
dplyr::mutate(sd = dplyr::if_else(.data$mean >= 0, .data$sd, 0)) %>%
dplyr::mutate(sd = dplyr::if_else(mean >= 0, sd, 0)) %>%
dplyr::mutate(
mean = round(.data$mean, digits = 4),
sd = round(.data$sd, digits = 4)
mean = round(mean, digits = 4),
sd = round(sd, digits = 4)
) %>%
dplyr::select(-.data$cohortEntries, -.data$cohortSubjects) %>%
dplyr::select(-cohortEntries, -cohortSubjects) %>%
dplyr::distinct() %>% makeDataExportable(
tableName = "temporal_covariate_value",
minCellCount = minCellCount,
Expand Down
Loading

0 comments on commit 096169a

Please sign in to comment.