Skip to content

Commit a3eb289

Browse files
committed
v0.3.0: added functions to assess dois (verify, retracted end exist)
1 parent aed36d9 commit a3eb289

11 files changed

+284
-7
lines changed

DESCRIPTION

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: IPBES.R
22
Type: Package
33
Title: Tool functions used by the Data and Knowledge Technical Support Unit of IPBES
4-
Version: 0.2.8
4+
Version: 0.3.0
55
Date: 2023-11-13
66
Authors@R:
77
c(person(given = "Rainer M.",
@@ -22,7 +22,8 @@ Imports:
2222
tidygraph,
2323
ggplot2,
2424
ggraph,
25-
networkD3
25+
networkD3,
26+
httr2
2627
Suggests:
2728
roxyglobals,
2829
tinytest

NAMESPACE

+13
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# Generated by roxygen2: do not edit by hand
22

33
export(abbreviate_authors)
4+
export(doi_exists)
5+
export(doi_not_retracted)
6+
export(doi_valid)
47
export(get_count)
58
export(plot_snowball)
69
export(plot_snowball_interactive)
@@ -9,6 +12,7 @@ importFrom(dplyr,arrange)
912
importFrom(dplyr,desc)
1013
importFrom(dplyr,filter)
1114
importFrom(dplyr,full_join)
15+
importFrom(dplyr,left_join)
1216
importFrom(dplyr,mutate)
1317
importFrom(dplyr,relocate)
1418
importFrom(dplyr,rename)
@@ -29,11 +33,20 @@ importFrom(ggraph,geom_node_point)
2933
importFrom(ggraph,ggraph)
3034
importFrom(ggraph,scale_edge_width)
3135
importFrom(ggraph,theme_graph)
36+
importFrom(httr2,req_error)
37+
importFrom(httr2,req_headers)
38+
importFrom(httr2,req_perform)
39+
importFrom(httr2,req_retry)
40+
importFrom(httr2,req_throttle)
41+
importFrom(httr2,request)
42+
importFrom(httr2,resp_status)
3243
importFrom(networkD3,JS)
3344
importFrom(networkD3,forceNetwork)
3445
importFrom(openalexR,oa_query)
3546
importFrom(openalexR,oa_request)
3647
importFrom(openalexR,snowball2df)
3748
importFrom(tidygraph,as_tbl_graph)
49+
importFrom(utils,download.file)
50+
importFrom(utils,read.csv)
3851
importFrom(utils,tail)
3952
importFrom(writexl,write_xlsx)

R/doi_exists.R

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
library(httr2)
2+
3+
4+
5+
#' Check if DOIs exist
6+
#'
7+
#' This function checks if a given list of DOIs exist by sending HTTP requests to the DOI resolver.
8+
#'
9+
#' @details This function uses the httr package to send HTTP GET requests to the DOI resolver and checks the response status code. A status code of 200 indicates that the DOI exists, while any other status code indicates that the DOI does not exist.
10+
#'
11+
#' @param dois A character vector of DOIs to check.
12+
#' @param cache_file A file name of the cache to be used, i.e. the confirmed existing dois. The format is a character vector with the DOIs which exist. If the cache exist, it will be updated at the end.
13+
#'
14+
#' @return A named logical vector indicating whether each DOI does exist or not, names are the dois.
15+
#'
16+
#' @examples
17+
#' dois <- c("sbcd1234", "10.1234/abcd", "10.1002/jcb.23190", "10.47366/sabia.v5n1a3")
18+
#' doi_exists(dois)
19+
#' # Output: [1] FALSE TRUE
20+
#'
21+
#' @importFrom httr2 request req_headers req_throttle req_retry req_error req_perform resp_status
22+
#'
23+
#' @export
24+
doi_exists <- function(dois, cache_file = NULL) {
25+
if (is.null(cache_file)) {
26+
dois_to_check <- 1:length(dois)
27+
cache <- NULL
28+
} else {
29+
if (file.exists(cache_file)) {
30+
cache <- readRDS(cache_file)
31+
dois_to_check <- (1:length(dois))[!(dois %in% cache)]
32+
} else {
33+
dois_to_check <- 1:length(dois)
34+
cache <- NULL
35+
}
36+
}
37+
38+
total <- length(dois_to_check)
39+
result_dois_checked <- sapply(
40+
dois_to_check,
41+
function(i) {
42+
# Print progress
43+
cat(sprintf("\rProgress: %d of %d", i, total))
44+
45+
if (is.na(dois[i])) {
46+
return(NA)
47+
} else {
48+
tryCatch(
49+
{
50+
status <- httr2::request(paste0("https://doi.org/", dois[i])) |>
51+
httr2::req_headers(
52+
noredirect = TRUE,
53+
type = "URL"
54+
) |>
55+
httr2::req_throttle(
56+
rate = 30 / 60
57+
) |>
58+
httr2::req_retry(
59+
max_tries = 5
60+
) |>
61+
req_error(
62+
is_error = function(e) {
63+
FALSE
64+
}
65+
) |>
66+
httr2::req_perform() |>
67+
httr2::resp_status()
68+
return(status == 200)
69+
},
70+
error = function(e) {
71+
return(NA)
72+
}
73+
)
74+
}
75+
}
76+
)
77+
78+
result <- dois
79+
result[] <- NA
80+
result <- as.logical(result)
81+
names(result) <- dois
82+
83+
result[dois_to_check] <- result_dois_checked
84+
85+
if (!is.null(cache)) {
86+
result[dois[(dois %in% cache)]] <- TRUE
87+
cache <- c(cache, result[result])
88+
cache <- cache[!duplicated(cache)]
89+
saveRDS(cache, cache_file)
90+
}
91+
return(result)
92+
}

R/doi_retracted.R

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#' Check if DOIs are not retracted
2+
#'
3+
#' This function checks if a given list of DOIs (Digital Object Identifiers) are retracted.
4+
#' It uses the Crossref API to query the Retraction Watch database.
5+
#'
6+
#' @param dois A character vector of DOIs to be checked.
7+
#' @param cache_file A file name of the cache to be used, i.e. the downloaded retraction data. THe file is an `rds` file as downloaded from the retractionwatch site. If NULL, the data will not be cached.
8+
#' @param email An optional email address to be included in the API request [RECOMMENDET!].
9+
#'
10+
#' @return A named logical vector indicating whether each DOI is retracted (`FALSE`) or or not (`TRUE`), names are the dois.
11+
#'
12+
#' @importFrom utils download.file read.csv
13+
#' @examples
14+
#' # Check if a single DOI is retracted
15+
#' doi_not_retracted("10.1234/abcd")
16+
#'
17+
#' # Check if multiple DOIs are retracted
18+
#' dois <- c("sbcd1234", "10.1234/abcd", "10.1002/jcb.23190", "10.47366/sabia.v5n1a3")
19+
#' doi_not_retracted(dois)
20+
#'
21+
#' @export
22+
doi_not_retracted <- function(dois, cache_file = NULL, email = NULL) {
23+
if (is.null(cache_file)) {
24+
tmpfile <- tempfile(fileext = ".csv")
25+
utils::download.file(
26+
url = paste0("https://api.labs.crossref.org/data/retractionwatch?", email),
27+
destfile = tmpfile
28+
)
29+
cache <- read.csv(tmpfile, stringsAsFactors = FALSE)
30+
unlink(tmpfile)
31+
} else {
32+
if (file.exists(cache_file)) {
33+
cache <- readRDS(cache_file)
34+
} else {
35+
tmpfile <- tempfile(fileext = ".csv")
36+
utils::download.file(
37+
url = paste0("https://api.labs.crossref.org/data/retractionwatch?", email),
38+
destfile = tmpfile
39+
)
40+
cache <- read.csv(tmpfile, stringsAsFactors = FALSE)
41+
saveRDS(cache, cache_file)
42+
unlink(tmpfile)
43+
}
44+
}
45+
46+
result <- !(dois %in% cache$OriginalPaperDOI)
47+
names(result) <- dois
48+
return(result)
49+
}

R/doi_valid.R

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#' Validate DOIs
2+
#'
3+
#' This function validates a vector of DOIs (Digital Object Identifiers) using a regular expression pattern.#'
4+
#' It is taken from https://github.com/libscie/retractcheck/blob/23f1e5c7d572d9470583288d951d1bad98392f82/R/utils.R#L16
5+
6+
#' @param dois A vector of DOIs to be validated.
7+
#'
8+
#' @return A named logical vector indicating whether each DOI is valid or not, names are the dois.
9+
#'
10+
#' @details The function uses a regular expression pattern to validate the format of each DOI in the input vector.
11+
#' The regular expression pattern is based on the pattern used by the \code{retractcheck} package and can be found at
12+
#' https://github.com/libscie/retractcheck/blob/23f1e5c7d572d9470583288d951d1bad98392f82/R/utils.R#L16.
13+
#' Alternatively, you can uncomment the second regular expression pattern and comment out the first one to use the pattern
14+
#' from the \code{rorcid} package, which can be found at https://github.com/ropensci-archive/rorcid/blob/master/R/check_dois.R.
15+
#'
16+
#' @examples
17+
#' dois <- c("sbcd1234", "10.1234/abcd", "10.1002/jcb.23190", "10.47366/sabia.v5n1a3")
18+
#' doi_valid(dois)
19+
#'
20+
#' @export
21+
doi_valid <- function(dois) {
22+
## regex <- "^10\\.\\d{4,9}/[-._;()/:A-Z0-9]+$" # https://github.com/libscie/retractcheck/blob/23f1e5c7d572d9470583288d951d1bad98392f82/R/utils.R#L16
23+
regex <- "\\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![\"&\'<>])\\S)+)\\b" # https://github.com/ropensci-archive/rorcid/blob/master/R/check_dois.R
24+
result <- grepl(
25+
x = dois, pattern = regex,
26+
perl = TRUE, ignore.case = TRUE
27+
)
28+
names(result) <- dois
29+
return(result)
30+
}

R/globals.R

-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# Generated by roxyglobals: do not edit by hand
22

33
utils::globalVariables(c(
4-
"x", # <plot_snowball>
54
"index", # <plot_snowball>
65
"cited_by_count_by_year", # <plot_snowball>
76
"type", # <plot_snowball>
@@ -11,7 +10,6 @@ utils::globalVariables(c(
1110
"cited_by_count", # <plot_snowball>
1211
"Freq", # <to_xlsx>
1312
"Var1", # <to_xlsx>
14-
"flat_snow", # <to_xlsx>
1513
"id", # <to_xlsx>
1614
"publication_year", # <to_xlsx>
1715
"cited_by_count", # <to_xlsx>

R/plot_snowball_interactive.R

+4-2
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,23 @@
33
#' This function creates a interactive snowball seaarch network using the networkD3 package.
44
#'
55
#' @param snowball The snowball object containing the network data. The object is returned from the
6-
#' \link[openalexR]{oa_snowball function in the `openalexR`` package
6+
#' \link[openalexR]{oa_snowball} function in the `openalexR`` package
77
#' @param key_works A data frame, as returned. e.g. by `oa_fetch(entity = "works", ...`,
88
#' containing the key-works from the snowball search which will be highlighted in the network.
99
#' @param file The file name to save the network to. TThe directory has tro esxist.
1010
#' Default: `NULL`, i.e. not saved.
1111
#'
1212
#' @importFrom networkD3 forceNetwork JS
1313
#'
14+
#' @importFrom dplyr mutate select rename left_join
15+
#'
1416
#' @return A networkD3 object representing the interactive network plot.
1517
#'
1618
#' @md
1719
#'
1820
#' @examples
1921
#' \dontrun{
20-
#' plot_snowball_interactive(snowball)
22+
#' plot_snowball_interactive(snowball, key_works, file)
2123
#' }
2224
#' @export
2325
plot_snowball_interactive <- function(snowball, key_works, file) {

man/doi_exists.Rd

+28
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/doi_not_retracted.Rd

+31
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/doi_valid.Rd

+30
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/plot_snowball_interactive.Rd

+4-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)