diff --git a/.Rbuildignore b/.Rbuildignore index 45a3fc4..1c67824 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,3 +2,4 @@ ^\.Rproj\.user$ ^docs$ ^\.github$ +^\.ipynb diff --git a/DESCRIPTION b/DESCRIPTION index 3531595..40665f3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: restatapi Type: Package Title: Search and Retrieve Data from Eurostat Database -Date: 2024-03-14 -Version: 0.22.9 +Date: 2024-03-25 +Version: 0.23.0 Encoding: UTF-8 Authors@R: c(person("Mátyás", "Mészáros", email = "matyas.meszaros@ec.europa.eu", role = c("aut", "cre")), person("Sebastian", "Weinand", role = "ctb")) diff --git a/NEWS.md b/NEWS.md index 89a0304..57bc6b2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# restatapi 0.23.0 + +- correction of `get_eurostat_toc()` function and the functions using the `check_toc` options because the content of the XML TOC has changed + # restatapi 0.22.9 - correction of caching when to DSD downloaded with different languages @@ -27,11 +31,11 @@ # restatapi 0.22.3 -- additional check in the get_eurostat_dsd() and get_eurostat_codelist() for failing writing data to disk because of failing network connection +- additional check in the `get_eurostat_dsd()` and `get_eurostat_codelist()` for failing writing data to disk because of failing network connection # restatapi 0.22.2 -- correcting the get_compressed_sdmx() function not closing connections +- correcting the `get_compressed_sdmx()` function not closing connections - updating examples and tests # restatapi 0.22.1 diff --git a/R/extract_data.R b/R/extract_data.R index 84b61b8..c995ec1 100644 --- a/R/extract_data.R +++ b/R/extract_data.R @@ -45,7 +45,7 @@ extract_data<-function(xml_lf,keep_flags=FALSE,stringsAsFactors=FALSE,bulk=TRUE, dv<-xml2::xml_attrs(xml2::xml_children(xml_lf)) if (keep_flags){ flagc<-switch(rav,"1"="OBS_STATUS","2"="OBS_FLAG") - if (check_toc) {flagc<-"OBS_STATUS"} + # if (check_toc) {flagc<-"OBS_STATUS"} cn<-c("TIME_PERIOD","OBS_VALUE",flagc) } else { cn<-c("TIME_PERIOD","OBS_VALUE") diff --git a/R/get_eurostat_raw.R b/R/get_eurostat_raw.R index a10a6d5..0158b19 100644 --- a/R/get_eurostat_raw.R +++ b/R/get_eurostat_raw.R @@ -307,12 +307,12 @@ get_eurostat_raw <- function(id, # } } else if (mode=="xml"){ format<-switch(rav, "1" = "zip", "2" = "gz") - if (check_toc) {format<-"zip"} + # if (check_toc) {format<-"zip"} if (verbose) {message("get_eurostat_raw - file format: ",format)} sdmx_file<-restatapi::get_compressed_sdmx(bulk_url,verbose=verbose,format=format) if(!is.null(sdmx_file)){ xml_mark<-switch(rav, "1" = ".//data:Series", "2" = ".//Series") - if (check_toc) {xml_mark<-".//data:Series"} + # if (check_toc) {xml_mark<-".//data:Series"} xml_leafs<-xml2::xml_find_all(sdmx_file,xml_mark) if (verbose) {message("get_eurostat_raw - class(xml_leafs): ",class(xml_leafs), "\nget_eurostat_raw - number of nodes: ",length(xml_leafs), @@ -320,17 +320,17 @@ get_eurostat_raw <- function(id, if (Sys.info()[['sysname']]=='Windows'){ if (getOption("restatapi_cores",1L)==1) { if (verbose) message("No parallel") - restat_raw<-data.table::rbindlist(lapply(xml_leafs,extract_data,keep_flags=keep_flags,stringsAsFactors=stringsAsFactors,check_toc=check_toc)) + restat_raw<-data.table::rbindlist(lapply(xml_leafs,extract_data,keep_flags=keep_flags,stringsAsFactors=stringsAsFactors)) } else { xml_leafs<-as.character(xml_leafs) cl<-parallel::makeCluster(getOption("restatapi_cores",1L)) parallel::clusterEvalQ(cl,require(xml2)) parallel::clusterExport(cl,c("extract_data")) - restat_raw<-data.table::rbindlist(parallel::parLapply(cl,xml_leafs,extract_data,keep_flags=keep_flags,stringsAsFactors=stringsAsFactors,check_toc=check_toc)) + restat_raw<-data.table::rbindlist(parallel::parLapply(cl,xml_leafs,extract_data,keep_flags=keep_flags,stringsAsFactors=stringsAsFactors)) parallel::stopCluster(cl) } }else{ - restat_raw<-data.table::rbindlist(parallel::mclapply(xml_leafs,extract_data,keep_flags=keep_flags,stringsAsFactors=stringsAsFactors,check_toc=check_toc,mc.cores=getOption("restatapi_cores",1L))) + restat_raw<-data.table::rbindlist(parallel::mclapply(xml_leafs,extract_data,keep_flags=keep_flags,stringsAsFactors=stringsAsFactors,mc.cores=getOption("restatapi_cores",1L))) } } else{ message("Could not download the SDMX file, use the verbose option to see the exact cause of the error.") diff --git a/R/get_eurostat_toc.R b/R/get_eurostat_toc.R index 8d22656..3fa5bd8 100644 --- a/R/get_eurostat_toc.R +++ b/R/get_eurostat_toc.R @@ -30,9 +30,7 @@ #' \code{metadata.sdmx}\tab The link to the metadata in SDMX format, and this column exists only if the #' download \code{mode} is "xml"\cr #' \code{downloadLink.tsv}\tab The link to the whole dataset/table in tab separated values format in the bulk -#' download facility and this column exists only if the download \code{mode} is "xml"\cr -#' \code{downloadLink.sdmx}\tab The link to the whole dataset/table in SDMX format in the bulk download -#' facility and this column exists only if the download \code{mode} is "xml" +#' download facility and this column exists only if the download \code{mode} is "xml" #' } #' @export #' @seealso \code{\link{search_eurostat_toc}}, \code{\link{get_eurostat_dsd}}, \code{\link{get_eurostat_raw}}, \code{\link{get_eurostat_bulk}}, \code{\link{get_eurostat_data}}. @@ -65,6 +63,7 @@ get_eurostat_toc<-function(mode="xml", verbose=FALSE,...) { toc<-xml_leafs<-NULL tbc<-TRUE + verbose<-verbose|getOption("restatapi_verbose",FALSE) if (verbose) {message("\nget_eurostat_toc - API version:",get("rav",envir=restatapi::.restatapi_env)," - number of cores:",getOption("restatapi_cores",1L))} if((!exists(".restatapi_env")|(length(list(...))>0))){ if ((length(list(...))>0)) { @@ -81,7 +80,7 @@ get_eurostat_toc<-function(mode="xml", update_cache<-update_cache|getOption("restatapi_update",FALSE) dmethod<-getOption("restatapi_dmethod",get("dmethod",envir=restatapi::.restatapi_env)) if(any(grepl("get_eurostat_bulk|get_eurostat_data|get_eurostat_raw",as.character(sys.calls()),perl=TRUE))) {update_cache<-FALSE} - verbose<-verbose|getOption("restatapi_verbose",FALSE) + if ((cache) & (!update_cache)) { toc<-restatapi::get_eurostat_cache(paste0("toc.",mode,".",lang),cache_dir,verbose=verbose) } @@ -177,9 +176,9 @@ get_eurostat_toc<-function(mode="xml", type<-as.character(unlist(lapply(xml_leafs,xml2::xml_attr,attr="type"))) toc<-cbind(toc,type) # names(toc)<-c(sub("\\.$","",paste(xml2::xml_name(xml2::xml_children(xml_leafs[1])),sub(".*)","",as.character(xml2::xml_attrs(xml2::xml_children(xml_leafs[1])))),sep="."),perl=TRUE),"type") - keep<-c(paste0("title.",lang),"code","type","lastUpdate","lastModified","dataStart","dataEnd","values",paste0("unit.",lang),paste0("shortDescription.",lang),"metadata.html","metadata.sdmx","downloadLink.tsv","downloadLink.sdmx") + keep<-c(paste0("title.",lang),"code","type","lastUpdate","lastModified","dataStart","dataEnd","values",paste0("unit.",lang),paste0("shortDescription.",lang),"metadata.html","metadata.sdmx","downloadLink.tsv") toc<-toc[,keep,with=FALSE] - names(toc)<-c("title","code","type","lastUpdate","lastModified","dataStart","dataEnd","values","unit","shortDescription","metadata.html","metadata.sdmx","downloadLink.tsv","downloadLink.sdmx") + names(toc)<-c("title","code","type","lastUpdate","lastModified","dataStart","dataEnd","values","unit","shortDescription","metadata.html","metadata.sdmx","downloadLink.tsv") } } } diff --git a/R/search_eurostat_toc.R b/R/search_eurostat_toc.R index 712501c..7693d6e 100644 --- a/R/search_eurostat_toc.R +++ b/R/search_eurostat_toc.R @@ -25,8 +25,7 @@ #' \code{shortDescription}\tab The short description of the values for tables in the language provided by the \code{lang} parameterif the \code{type} 'dataset' this column is empty\cr #' \code{metadata.html}\tab The link to the metadata in html format\cr #' \code{metadata.sdmx}\tab The link to the metadata in SDMX format\cr -#' \code{downloadLink.tsv}\tab The link to the whole dataset/table in tab separated values format in the bulk download facility \cr -#' \code{downloadLink.sdmx}\tab The link to the whole dataset/table in SDMX format in the bulk download facility +#' \code{downloadLink.tsv}\tab The link to the whole dataset/table in tab separated values format in the bulk download facility #' } #' The value in the \code{code} column can be used as an id in the \code{\link{get_eurostat_data}}, \code{\link{get_eurostat_bulk}}, \code{\link{get_eurostat_raw}} and \code{\link{get_eurostat_dsd}} functions. #' If there is no hit for the search query, it returns \code{NULL}. diff --git a/inst/tinytest/test_restatapi.R b/inst/tinytest/test_restatapi.R index cac50dd..1ea2fe8 100644 --- a/inst/tinytest/test_restatapi.R +++ b/inst/tinytest/test_restatapi.R @@ -48,7 +48,7 @@ txt_toc<-get_eurostat_toc(mode="txt") t2<-system.time({get_eurostat_toc()})[3] expect_warning(get_eurostat_toc(mode="text")) # 1 if (!is.null(xml_toc)){ - expect_equal(ncol(xml_toc),14) # 2 + expect_equal(ncol(xml_toc),13) # 2 expect_true(exists("toc.xml.en",envir=restatapi::.restatapi_env)) # 3 if (!is.null(txt_toc)){ expect_equal(ncol(txt_toc),8) # 4 @@ -110,19 +110,23 @@ if (!is.null(dt1)&is.data.frame(dt1)&!is.null(dt2)&is.data.frame(dt2)){ } else {not_checked<-paste(not_checked,"18-21",sep=",")} if (!is.null(xml_toc)){ + testid3<-xml_toc$code[xml_toc$values==min(xml_toc$values)][1] + if (!is.na(testid3)){ + expect_equal(nrow(get_eurostat_raw(testid3,verbose=FALSE)),min(xml_toc$values)) # 22 + expect_equal(nrow(get_eurostat_raw(testid3,check_toc=TRUE,verbose=FALSE)),min(xml_toc$values)) # 23 + expect_message(bt1<-get_eurostat_bulk("blabla",check_toc=TRUE,verbose=FALSE)) # 24 + expect_equal(bt1,NULL) # 25 + expect_equal(nrow(get_eurostat_data(testid3,verbose=FALSE)),min(xml_toc$values)) # 26 + } else {not_checked<-paste(not_checked,"22-26",sep=",")} testid3<-xml_toc$code[is.na(xml_toc$values)&is.na(xml_toc$lastUpdate)&is.na(xml_toc$downloadLink.tsv)][1] # testid3<-xml_toc$code[(xml_toc$shortDescription=="")&is.na(xml_toc$metadata.html)&is.na(xml_toc$metadata.sdmx)][1] if (!is.na(testid3)){ - expect_message(rt1<-get_eurostat_raw(testid3,verbose=FALSE)) # 22 - expect_equal(rt1,NULL) # 23 - expect_message(rt2<-get_eurostat_raw(testid3,check_toc=TRUE,verbose=FALSE)) # 24 - expect_equal(rt2,NULL) # 25 - expect_message(bt1<-get_eurostat_bulk("blabla",check_toc=TRUE,verbose=FALSE)) # 26 - expect_equal(bt1,NULL) # 27 - expect_message(dt3<-get_eurostat_data(testid3,verbose=FALSE)) # 28 - expect_equal(dt3,NULL) # 29 - } + expect_message(rt1<-get_eurostat_raw(testid3,verbose=FALSE)) # 27 + expect_message(rt2<-get_eurostat_raw(testid3,check_toc=TRUE,verbose=FALSE)) # 28 + expect_message(dt3<-get_eurostat_data(testid3,verbose=FALSE)) # 29 + } else {not_checked<-paste(not_checked,"26-29",sep=",")} } else {not_checked<-paste(not_checked,"22-29",sep=",")} + rt3<-get_eurostat_raw(testid4,mode="xml",stringsAsFactors=TRUE,keep_flags=TRUE) bt2<-get_eurostat_data(testid4,keep_flags=TRUE,stringsAsFactors=FALSE) dt4<-get_eurostat_data(testid4,date_filter=2008,keep_flags=TRUE,stringsAsFactors=FALSE) @@ -145,7 +149,7 @@ if (!is.null(bt3)&!is.null(bt4)){ } else {not_checked<-paste(not_checked,"32",sep=",")} if (!is.null(rt4)&!is.null(rt5)){ expect_true(nrow(rt4)==nrow(rt5)) # 33 - expect_true(ncol(rt4)+2==ncol(rt5)) # 34 + expect_true(ncol(rt4)+1==ncol(rt5)) # 34 } else {not_checked<-paste(not_checked,"33-34",sep=",")} #### test of filtering in the get_eurostat_data function