Skip to content

Commit

Permalink
preparing for CRAN submission
Browse files Browse the repository at this point in the history
  • Loading branch information
mmatyi committed Mar 25, 2024
1 parent 6380dfd commit 9a78022
Show file tree
Hide file tree
Showing 10 changed files with 39 additions and 26 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: restatapi
Type: Package
Title: Search and Retrieve Data from Eurostat Database
Date: 2024-03-25
Version: 0.23.0
Version: 0.23.1
Encoding: UTF-8
Authors@R: c(person("Mátyás", "Mészáros", email = "[email protected]", role = c("aut", "cre")),
person("Sebastian", "Weinand", role = "ctb"))
Expand Down
8 changes: 7 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# restatapi 0.23.1

- adding option `lang` to the `get_eurostat_data()` function to be able to use the German or French DSD for the `filters`
- correction of the `get_eurostat_toc()` function when the `mode="txt"` and `lang="fr"`
- checking if `restatapi_cores` is more than the maximum number of available cores

# restatapi 0.23.0

- correction of `get_eurostat_toc()` function and the functions using the `check_toc` options because the content of the XML TOC has changed
- correction of the `get_eurostat_toc()` function and the functions using the `check_toc` options because the content of the XML TOC has changed

# restatapi 0.22.9

Expand Down
10 changes: 7 additions & 3 deletions R/get_eurostat_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#' If a named list is used, then the name of the list elements should be the concepts from the DSD and the provided values will be used to filter the dataset for the given concept.
#' The default is \code{NULL}, in this case the whole dataset is returned via the bulk download. To filter by time see \code{date_filter} below.
#' If after filtering still the dataset has more observations than the limit per query via the API, then the raw download is used to retrieve the whole dataset and apply the filter on the local computer. This option can be disabled with the \code{local_filter=FALSE} parameter.
#' @param lang a character string either \code{en}, \code{de} or \code{fr} to define the language version for the DSD to search in for the \code{filters}. The default is \code{en} - English.
#' @param exact_match a boolean with the default value \code{TRUE}, if the strings provided in \code{filters} shall be matched exactly as it is or as a pattern.
#' @param date_filter a vector which can be numeric or character containing dates to filter the dataset. If date is defined as character string it should follow the format yyyy[-mm][-dd], where the month and the day part is optional.
#' If date filter applied only part of the dataset is downloaded through the API.
Expand Down Expand Up @@ -139,7 +140,8 @@
#' label=TRUE,
#' name=FALSE)
#' dt<-get_eurostat_data("agr_r_milkpr",
#' filters=c("BE$","Hungary"),
#' filters=c("BE$","Ungarn"),
#' lang="de",
#' date_filter="2007-06<",
#' keep_flags=TRUE)
#' dt<-get_eurostat_data("nama_10_a10_e",
Expand Down Expand Up @@ -172,6 +174,7 @@

get_eurostat_data <- function(id,
filters=NULL,
lang="en",
exact_match=TRUE,
date_filter=NULL,
label=FALSE,
Expand All @@ -194,6 +197,7 @@ get_eurostat_data <- function(id,
verbose<-verbose|getOption("restatapi_verbose",FALSE)
update_cache<-update_cache|getOption("restatapi_update",FALSE)
dmethod<-getOption("restatapi_dmethod",get("dmethod",envir=restatapi::.restatapi_env))
if (getOption("restatapi_cores",1L)>=parallel::detectCores()) options(restatapi_cores=parallel::detectCores()-1)
# if (verbose) {message("\nget_eurostat_data - API version:",get("rav",envir=restatapi::.restatapi_env))}
tbc<-cr<-TRUE # to be continued for the next steps / cache result data.table
if (verbose) {message("get_eurostat_data - footer code option value at start:",paste(getOption("code_opt",NULL),collapse=", "))}
Expand Down Expand Up @@ -303,7 +307,7 @@ get_eurostat_data <- function(id,
{
if (!is.null(filters))#filter defined => create filter table and filter url
{
dsd<-get_eurostat_dsd(id,verbose=verbose)
dsd<-get_eurostat_dsd(id,lang=lang,verbose=verbose)
if (is.null(dsd)){
message("Could not download the DSD. The filter is ignored")
filters_url<-NULL
Expand Down Expand Up @@ -716,7 +720,7 @@ get_eurostat_data <- function(id,
if (label & !is.null(restat)) #label data
{
if (verbose) {message("get_eurostat_data - restat - nrow:",nrow(restat),";ncol:",ncol(restat))}
dsd<-restatapi::get_eurostat_dsd(id,verbose=verbose)
dsd<-restatapi::get_eurostat_dsd(id,lang=lang,verbose=verbose)
if (!is.null(dsd)){
if (verbose) {message("get_eurostat_data - dsd - nrow:",nrow(dsd),";ncol:",ncol(dsd))}
cn<-colnames(restat)[!(colnames(restat) %in% c("time","values","flags"))]
Expand Down
1 change: 1 addition & 0 deletions R/get_eurostat_dsd.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ get_eurostat_dsd <- function(id,
verbose=FALSE,...) {
verbose<-verbose|getOption("restatapi_verbose",FALSE)
dmethod<-getOption("restatapi_dmethod",get("dmethod",envir=restatapi::.restatapi_env))
if (getOption("restatapi_cores",1L)>=parallel::detectCores()) options(restatapi_cores=parallel::detectCores()-1)
# if (verbose) {message("\nget_eurostat_dsd - API version:",get("rav",envir=restatapi::.restatapi_env))}
tbc<-TRUE #to be continued for the next steps
if (is.null(id)){
Expand Down
10 changes: 1 addition & 9 deletions R/get_eurostat_raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ get_eurostat_raw <- function(id,
verbose<-verbose|getOption("restatapi_verbose",FALSE)
update_cache<-update_cache|getOption("restatapi_update", FALSE)
dmethod<-getOption("restatapi_dmethod",get("dmethod",envir=restatapi::.restatapi_env))
if (getOption("restatapi_cores",1L)>=parallel::detectCores()) options(restatapi_cores=parallel::detectCores()-1)
tbc<-TRUE #to be continued to the next steps
if (verbose) {message("\nget_eurostat_raw - API version:",get("rav",envir=restatapi::.restatapi_env))}
if((!exists(".restatapi_env")|(length(list(...))>0))){
Expand Down Expand Up @@ -232,8 +233,6 @@ get_eurostat_raw <- function(id,
restat_raw<-data.table::copy(raw)
restat_raw[, c("DATAFLOW", "LAST UPDATE") := NULL]
rm(raw)
# restat_raw$OBS_VALUE<-gsub('^\\:$',"",restat_raw$OBS_VALUE,perl=TRUE)
# restat_raw$OBS_VALUE<-gsub('[^0-9\\.\\-\\:]',"",restat_raw$OBS_VALUE,perl=TRUE)
restat_raw<-data.table::data.table(restat_raw,stringsAsFactors=stringsAsFactors)
}
} else if (mode=="txt"){
Expand Down Expand Up @@ -283,14 +282,9 @@ get_eurostat_raw <- function(id,
rm(raw)
data.table::setnames(raw_melted,2:3,c(rname,"values"))
raw_melted<-raw_melted[raw_melted$values!=":",]
# if (check_toc|rav==1){
# FREQ<-gsub("MD","D",gsub('[0-9\\.\\-]',"",raw_melted$time))
# FREQ[FREQ==""]<-"A"
# }
restat_raw<-data.table::as.data.table(data.table::tstrsplit(raw_melted$bdown,",",fixed=TRUE),stringsAsFactors=stringsAsFactors)
data.table::setnames(restat_raw,cnames)
restat_raw<-data.table::data.table(restat_raw,raw_melted[,2:3],stringsAsFactors=stringsAsFactors)
# if (check_toc|rav==1) {restat_raw<-data.table::data.table(FREQ,restat_raw)}
if (keep_flags) {restat_raw$flags<-gsub('[0-9\\.\\-\\s\\:]',"",restat_raw$values,perl=TRUE)}
restat_raw$values<-gsub('^\\:$',"",restat_raw$values,perl=TRUE)
restat_raw$values<-gsub('[^0-9\\.\\-\\:]',"",restat_raw$values,perl=TRUE)
Expand All @@ -307,12 +301,10 @@ get_eurostat_raw <- function(id,
# }
} else if (mode=="xml"){
format<-switch(rav, "1" = "zip", "2" = "gz")
# if (check_toc) {format<-"zip"}
if (verbose) {message("get_eurostat_raw - file format: ",format)}
sdmx_file<-restatapi::get_compressed_sdmx(bulk_url,verbose=verbose,format=format)
if(!is.null(sdmx_file)){
xml_mark<-switch(rav, "1" = ".//data:Series", "2" = ".//Series")
# if (check_toc) {xml_mark<-".//data:Series"}
xml_leafs<-xml2::xml_find_all(sdmx_file,xml_mark)
if (verbose) {message("get_eurostat_raw - class(xml_leafs): ",class(xml_leafs),
"\nget_eurostat_raw - number of nodes: ",length(xml_leafs),
Expand Down
10 changes: 4 additions & 6 deletions R/get_eurostat_toc.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ get_eurostat_toc<-function(mode="xml",
toc<-xml_leafs<-NULL
tbc<-TRUE
verbose<-verbose|getOption("restatapi_verbose",FALSE)
update_cache<-update_cache|getOption("restatapi_update",FALSE)
dmethod<-getOption("restatapi_dmethod",get("dmethod",envir=restatapi::.restatapi_env))
if (getOption("restatapi_cores",1L)>=parallel::detectCores()) options(restatapi_cores=parallel::detectCores()-1)
if (verbose) {message("\nget_eurostat_toc - API version:",get("rav",envir=restatapi::.restatapi_env)," - number of cores:",getOption("restatapi_cores",1L))}
if((!exists(".restatapi_env")|(length(list(...))>0))){
if ((length(list(...))>0)) {
Expand All @@ -77,8 +80,6 @@ get_eurostat_toc<-function(mode="xml",
}
}
# if (verbose) {message("get_eurostat_toc - API version:",get("rav",envir=restatapi::.restatapi_env)," - number of cores:",getOption("restatapi_cores",1L))}
update_cache<-update_cache|getOption("restatapi_update",FALSE)
dmethod<-getOption("restatapi_dmethod",get("dmethod",envir=restatapi::.restatapi_env))
if(any(grepl("get_eurostat_bulk|get_eurostat_data|get_eurostat_raw",as.character(sys.calls()),perl=TRUE))) {update_cache<-FALSE}

if ((cache) & (!update_cache)) {
Expand All @@ -103,7 +104,7 @@ get_eurostat_toc<-function(mode="xml",
tbc<-FALSE
})
if (tbc) {
tryCatch({toc<-utils::read.csv(temp,header=TRUE,sep="\t",stringsAsFactors=FALSE)},
tryCatch({toc<-data.table::fread(temp,header=TRUE,sep="\t",stringsAsFactors=FALSE)},
error = function(e) {
if (verbose) {message("get_eurostat_toc - Error during the reading of the tsv version of the TOC file:",'\n',paste(unlist(e),collapse="\n"))}
else {message("There is an error by the reading of the downloaded txt TOC file. Run the same command with verbose=TRUE option to get more info on the issue.")}
Expand Down Expand Up @@ -170,12 +171,9 @@ get_eurostat_toc<-function(mode="xml",
})
}
if (exists("leafs")){
# toc<-data.frame(t(sapply(leafs, '[', seq(max(lengths(leafs))))),stringsAsFactors=FALSE)[,c(1:19)]
# type<-as.character(unlist(lapply(xml_leafs,xml2::xml_attrs)))
toc<-data.table::rbindlist(leafs,fill=TRUE)[,c(1:19)]
type<-as.character(unlist(lapply(xml_leafs,xml2::xml_attr,attr="type")))
toc<-cbind(toc,type)
# names(toc)<-c(sub("\\.$","",paste(xml2::xml_name(xml2::xml_children(xml_leafs[1])),sub(".*)","",as.character(xml2::xml_attrs(xml2::xml_children(xml_leafs[1])))),sep="."),perl=TRUE),"type")
keep<-c(paste0("title.",lang),"code","type","lastUpdate","lastModified","dataStart","dataEnd","values",paste0("unit.",lang),paste0("shortDescription.",lang),"metadata.html","metadata.sdmx","downloadLink.tsv")
toc<-toc[,keep,with=FALSE]
names(toc)<-c("title","code","type","lastUpdate","lastModified","dataStart","dataEnd","values","unit","shortDescription","metadata.html","metadata.sdmx","downloadLink.tsv")
Expand Down
11 changes: 11 additions & 0 deletions inst/tinytest/test_restatapi.R
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,17 @@ if (grepl("\\.amzn|-aws|5.4.109+|-azure ",Sys.info()['release'])) {

}

fr_txt_toc<-get_eurostat_toc(mode="txt",lang="fr")
de_txt_toc<-get_eurostat_toc(mode="txt",lang="de")
if (!is.null(fr_txt_toc)&!is.null(de_txt_toc)){
expect_true(nrow(fr_txt_toc)==nrow(de_txt_toc)) #a82
}

dt1<-get_eurostat_data("agr_r_milkpr",filters=c("BE$","Ungarn"),lang="de",date_filter="2007-06<", keep_flags=TRUE)
dt2<-get_eurostat_data("agr_r_milkpr",filters=c("BE$","Hungary"),date_filter="2007-06<", keep_flags=TRUE)
if (!is.null(dt1)&!is.null(dt2)){
expect_true(nrow(dt1)==nrow(dt2)) #a83
}

##################################
# clean up #
Expand Down
6 changes: 5 additions & 1 deletion man/get_eurostat_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions man/get_eurostat_toc.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/search_eurostat_toc.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9a78022

Please sign in to comment.