Skip to content

Commit

Permalink
Merge pull request #408 from stitam/i407
Browse files Browse the repository at this point in the history
Ensure all CIDs are positive integers when using pc_prop()
  • Loading branch information
stitam committed Jul 11, 2023
2 parents 5c177df + c88a3d6 commit cfe0233
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 9 deletions.
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# dev

## BUG FIXES

* `pc_prop()` returned `NA` without much further explanation if any of the queries were not positive integers. The updated function attempts to coerce queries to positive integers, only progresses valid queries, and prints informative messages along the way if verbose messages are enabled.

# webchem 1.3.0

## NEW FEATURES
Expand Down
42 changes: 37 additions & 5 deletions R/pubchem.R
Original file line number Diff line number Diff line change
Expand Up @@ -283,16 +283,19 @@ get_cid <-
#' \url{https://pubchem.ncbi.nlm.nih.gov/}
#' @import httr jsonlite
#'
#' @param cid character; Pubchem ID (CID).
#' @param properties character vector; properties to retrieve, e.g.
#' @param cid numeric; a vector of Pubchem IDs (CIDs). The input vector will be
#' coerced to a vector of positive integers. The function will return a row of
#' NAs for elements that cannot be coerced to positive integers.
#' @param properties character; a vector of properties to retrieve, e.g.
#' c("MolecularFormula", "MolecularWeight"). If NULL (default) all available
#' properties are retrieved. See
#' \url{https://pubchem.ncbi.nlm.nih.gov/docs/pug-rest}
#' for a list of all available properties.
#' @param verbose logical; should a verbose output be printed to the console?
#' @param ... currently not used.
#'
#' @return a data.frame
#' @return a tibble; each row is a queried CID, each column is a requested
#' property.
#' @seealso \code{\link{get_cid}}, \code{\link{pc_sect}}
#' @references Wang, Y., J. Xiao, T. O. Suzek, et al. 2009 PubChem: A Public
#' Information System for
Expand Down Expand Up @@ -334,12 +337,39 @@ pc_prop <- function(cid, properties = NULL, verbose = getOption("verbose"), ...)

if (!ping_service("pc")) stop(webchem_message("service_down"))

cid_o <- cid

if (verbose) message("Coercing queries to positive integers. ", appendLF = FALSE)

cid <- suppressWarnings(as.integer(cid))

if (verbose) {
index <- which(is.na(cid) & !is.na(cid_o))
if (length(index) > 0) {
for (i in index) {
message(paste0(cid_o[index], " coerced to NA. "), appendLF = FALSE)
}
}
}

if (any(cid <= 0, na.rm = TRUE)) {
index <- which(cid <= 0)
cid[index] <- NA
if (verbose) {
for (i in index) {
message(paste0(cid_o[index], " coerced to NA. "), appendLF = FALSE)
}
}
}

if (verbose) message("Done.")

if (mean(is.na(cid)) == 1) {
if (verbose) webchem_message("na")
return(NA)
}

napos <- which(is.na(cid))
cid_o <- cid
cid <- cid[!is.na(cid)]
prolog <- "https://pubchem.ncbi.nlm.nih.gov/rest/pug"
input <- "/compound/cid"
Expand Down Expand Up @@ -403,7 +433,9 @@ pc_prop <- function(cid, properties = NULL, verbose = getOption("verbose"), ...)
}
}}
rownames(out) <- NULL
class(out) <- c("pc_prop", "data.frame")
out$CID <- cid_o
out <- tibble::as_tibble(out)
class(out) <- c("pc_prop", class(out))
return(out)
}
else {
Expand Down
9 changes: 6 additions & 3 deletions man/pc_prop.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test-chembl.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ test_that("chembl_query()", {
o4 <- chembl_query("CHEMBL771355", resource = "assay")

expect_type(o1, "list")
expect_equal(length(o1[[1]]), 34)
expect_equal(length(o1[[1]]), 35)
expect_equal(o1m[2], "OK (HTTP 200).")
expect_equal(length(o2), 2)
expect_equal(o3[[1]]$entity_type, "ASSAY")
Expand Down
19 changes: 19 additions & 0 deletions tests/testthat/test-pubchem.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,25 @@ test_that("pc_prop", {
c <- pc_prop("5564", properties = c("CanonicalSmiles", "InChiKey"))
expect_true(is.na(b))
expect_equal(ncol(c), 3)

cids <- c(5564, NA, -1, "balloon", "2244")
d <- pc_prop(cids, properties = "CanonicalSmiles")
expect_true(all(d$CID == cids, na.rm = TRUE))
expect_true(all(is.na(d$CanonicalSMILES[c(2:4)])))
expect_false(any(is.na(d$CanonicalSMILES[c(1,5)])))

d1m <- capture_messages(
pc_prop(cids, properties = "CanonicalSmiles", verbose = TRUE))
expect_true(all(
d1m == c(
"Coercing queries to positive integers. ",
"balloon coerced to NA. ",
"-1 coerced to NA. ",
"Done.\n",
"Querying. ",
"OK (HTTP 200).",
"\n"
)))
})

test_that("pc_synonyms", {
Expand Down

0 comments on commit cfe0233

Please sign in to comment.