From 90a7f9e32ba1fb77eea900cd9ff6ea47ac40eac0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A9sz=C3=A1ros=20M=C3=A1ty=C3=A1s=20Tam=C3=A1s?= Date: Fri, 1 Mar 2024 22:22:18 +0000 Subject: [PATCH] correction for check_toc=TRUE --- DESCRIPTION | 4 ++-- NEWS.md | 4 ++++ R/clean_restatapi_cache.R | 2 +- R/get_eurostat_bulk.R | 12 ++++++------ R/get_eurostat_data.R | 6 +++--- R/get_eurostat_raw.R | 27 ++++++++++++++++----------- README.md | 2 +- inst/tinytest/test_restatapi.R | 4 ++-- 8 files changed, 35 insertions(+), 26 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 20c23aa..2e21d79 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: restatapi Type: Package Title: Search and Retrieve Data from Eurostat Database -Date: 2024-02-22 -Version: 0.22.7 +Date: 2024-03-01 +Version: 0.22.8 Encoding: UTF-8 Authors@R: c(person("Mátyás", "Mészáros", email = "matyas.meszaros@ec.europa.eu", role = c("aut", "cre")), person("Sebastian", "Weinand", role = "ctb")) diff --git a/NEWS.md b/NEWS.md index e7bf89c..b454c47 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# restatapi 0.22.8 + +- correction when `check_toc=TRUE` option is used + # restatapi 0.22.7 - correction for detection of cores to be able to load the package in WebR diff --git a/R/clean_restatapi_cache.R b/R/clean_restatapi_cache.R index bc3d1d9..18dc6fe 100644 --- a/R/clean_restatapi_cache.R +++ b/R/clean_restatapi_cache.R @@ -22,7 +22,7 @@ clean_restatapi_cache<-function(cache_dir=NULL,verbose=FALSE){ td<-ls(envir=restatapi::.restatapi_env) td<-td[!(td %in% c("cfg","rav","cc","dmethod"))] rm(list=td,envir=restatapi::.restatapi_env) - if (verbose){message("\nclean_restatapi_cache - All objects except from 'cfg', 'rav', 'cc' and 'dmethod' are removed from '.restatapi_env'.")} + if (verbose){message("\nclean_restatapi_cache - All objects (outside of 'cfg', 'rav', 'cc' and 'dmethod') are removed from '.restatapi_env'.")} } if (!is.null(cache_dir)){ if (dir.exists(cache_dir)){ diff --git a/R/get_eurostat_bulk.R b/R/get_eurostat_bulk.R index db098b1..e268a83 100644 --- a/R/get_eurostat_bulk.R +++ b/R/get_eurostat_bulk.R @@ -126,11 +126,11 @@ get_eurostat_bulk <- function(id, message("The TOC is missing. Could not get the download link.") tbc<-FALSE } else { - if (any(grepl(id,toc$code,ignore.case=TRUE))){ - udate<-toc$lastUpdate[grepl(id,toc$code,ignore.case=TRUE)] + if (id %in% toc$code){ + udate<-toc$lastUpdate[toc$code %in% id] if (verbose) {message("get_eurostat_bulk - TOC rows: ",nrow(toc), - "\nget_eurostat_bulk - bulk url: ",toc$downloadLink.tsv[grepl(id,toc$code,ignore.case=TRUE)], - "\nget_eurostat_bulk - ndata rowcount in TOC: ",toc$values[grepl(id,toc$code,ignore.case=TRUE)])} + "\nget_eurostat_bulk - tsv bulk url from TOC: ",toc$downloadLink.tsv[toc$code %in% id], + "\nget_eurostat_bulk - ndata rowcount in TOC: ",toc$values[toc$code %in% id])} } else { message(paste0("'",id,"' is not in the table of contents. Please check if the 'id' is correctly spelled.")) tbc<-FALSE @@ -150,7 +150,7 @@ get_eurostat_bulk <- function(id, if ((!cache)|is.null(restat_bulk)|(update_cache)){ if (verbose) {message("get_eurostat_bulk - class of id, cache, update_cache, cache_dir, compress_file, stringsAsFactors, keep_flags, check_toc, melt, verbose:\n", class(id)," - ",class(cache)," -",class(update_cache), " - ",class(cache_dir)," - ",class(compress_file)," - ",class(stringsAsFactors)," - ",class(keep_flags), - " - ",class(check_toc)," - ",class(melt)," - ",class(verbose))} + " - ",class(check_toc)," - ",class(TRUE)," - ",class(verbose))} restat_bulk<-restatapi::get_eurostat_raw(id,"txt",cache,update_cache,cache_dir,compress_file,stringsAsFactors,keep_flags,check_toc,melt=TRUE,verbose=verbose) } } @@ -158,7 +158,7 @@ get_eurostat_bulk <- function(id, if (!is.null(restat_bulk)){ restat_bulk[] drop<-NULL - if ("freq" %in% colnames(restat_bulk)) {setnames(restat_bulk,"freq","FREQ")} + if ("freq" %in% colnames(restat_bulk)) {data.table::setnames(restat_bulk,"freq","FREQ")} if ("FREQ" %in% colnames(restat_bulk)) {drop=c("FREQ")} if ("TIME_FORMAT" %in% colnames(restat_bulk)) {drop<-c(drop,"TIME_FORMAT")} if (is.null(select_freq)){ diff --git a/R/get_eurostat_data.R b/R/get_eurostat_data.R index 4a6198e..481d65a 100644 --- a/R/get_eurostat_data.R +++ b/R/get_eurostat_data.R @@ -232,9 +232,9 @@ get_eurostat_data <- function(id, message("The TOC is missing. Could not get the download link.") tbc<-FALSE } else { - if (any(grepl(id,toc$code,ignore.case=TRUE))){ - udate<-toc$lastUpdate[grepl(id,toc$code,ignore.case=TRUE)] - if (verbose) {message("get_eurostat_data - data TOC rows: ",nrow(toc),"\nbulk url: ",toc$downloadLink.tsv[grepl(id,toc$code,ignore.case=TRUE)],"\ndata rowcount: ",toc$values[grepl(id,toc$code,ignore.case=TRUE)])} + if (id %in% toc$code){ + udate<-toc$lastUpdate[toc$code %in% id] + if (verbose) {message("get_eurostat_data - data TOC rows: ",nrow(toc),"\n\tbulk url from TOC: ",toc$downloadLink.tsv[toc$code %in% id],"\n\tdata rowcount in TOC: ",toc$values[toc$code %in% id])} } else { message(paste0("'",id,"' is not in the table of contents. Please check if the 'id' is correctly spelled.")) tbc<-FALSE diff --git a/R/get_eurostat_raw.R b/R/get_eurostat_raw.R index 8410ea9..a10a6d5 100644 --- a/R/get_eurostat_raw.R +++ b/R/get_eurostat_raw.R @@ -132,12 +132,16 @@ get_eurostat_raw <- function(id, message("The TOC is missing. Could not get the download link.") tbc<-FALSE } else { - if (any(grepl(id,toc$code,ignore.case=TRUE))){ - udate<-toc$lastUpdate[grepl(id,toc$code,ignore.case=TRUE)] + if (id %in% toc$code){ + udate<-toc$lastUpdate[toc$code %in% id] if (mode=="txt") { - bulk_url<-toc$downloadLink.tsv[grepl(id,toc$code,ignore.case=TRUE)] + bulk_url_base<-eval(parse(text=paste0("cfg$BULK_BASE_URL$'",rav,"'$ESTAT"))) + bulk_url_end<- switch(rav,"1" = paste0("?file=data/",id,".tsv.gz"),"2"= paste0(id,"?format=TSV&compressed=true")) + bulk_url<-paste0(bulk_url_base,bulk_url_end) } else if (mode=="xml") { - bulk_url<-toc$downloadLink.sdmx[grepl(id,toc$code,ignore.case=TRUE)] + bulk_url_base<-eval(parse(text=paste0("cfg$BULK_BASE_URL$'",rav,"'$ESTAT"))) + bulk_url_end<- switch(rav,"1" = paste0("?file=data/",id,".sdmx.zip"),"2"= paste0(id,"?format=sdmx_2.1_structured&compressed=true")) + bulk_url<-paste0(bulk_url_base,bulk_url_end) } else { message("Incorrect mode:",mode,"\n It should be either 'txt' or 'xml'." ) tbc<-FALSE @@ -147,8 +151,9 @@ get_eurostat_raw <- function(id, tbc<-FALSE } if (verbose) {message("get_eurostat_raw - raws of TOC: ",nrow(toc), - "\nget_eurostat_raw - bulk url: ",bulk_url, - "\nget_eurostat_raw - data rowcount in TOC: ",toc$values[grepl(id,toc$code,ignore.case=TRUE)])} + "\nget_eurostat_raw - txt bulk url from TOC:",toc$downloadLink.tsv[toc$code %in% id], + "\nget_eurostat_raw - txt bulk url from cfg:",bulk_url, + "\nget_eurostat_raw - data rowcount in TOC: ",toc$values[toc$code %in% id])} } else { message(paste0("'",id,"' is not in the table of contents. Please check if the 'id' is correctly spelled.")) tbc<-FALSE @@ -278,14 +283,14 @@ get_eurostat_raw <- function(id, rm(raw) data.table::setnames(raw_melted,2:3,c(rname,"values")) raw_melted<-raw_melted[raw_melted$values!=":",] - if (check_toc|rav==1){ - FREQ<-gsub("MD","D",gsub('[0-9\\.\\-]',"",raw_melted$time)) - FREQ[FREQ==""]<-"A" - } + # if (check_toc|rav==1){ + # FREQ<-gsub("MD","D",gsub('[0-9\\.\\-]',"",raw_melted$time)) + # FREQ[FREQ==""]<-"A" + # } restat_raw<-data.table::as.data.table(data.table::tstrsplit(raw_melted$bdown,",",fixed=TRUE),stringsAsFactors=stringsAsFactors) data.table::setnames(restat_raw,cnames) restat_raw<-data.table::data.table(restat_raw,raw_melted[,2:3],stringsAsFactors=stringsAsFactors) - if (check_toc|rav==1) {restat_raw<-data.table::data.table(FREQ,restat_raw)} + # if (check_toc|rav==1) {restat_raw<-data.table::data.table(FREQ,restat_raw)} if (keep_flags) {restat_raw$flags<-gsub('[0-9\\.\\-\\s\\:]',"",restat_raw$values,perl=TRUE)} restat_raw$values<-gsub('^\\:$',"",restat_raw$values,perl=TRUE) restat_raw$values<-gsub('[^0-9\\.\\-\\:]',"",restat_raw$values,perl=TRUE) diff --git a/README.md b/README.md index 4d26957..68554fb 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ Next to the functions the package contains a list of country codes for different options(restatapi_cores=3) get_eurostat_toc() options(restatapi_dmethod="libcurl") -get_get_eurostat_toc(mode="txt",verbose=TRUE) +get_eurostat_toc(mode="txt",verbose=TRUE) search_eurostat_toc("energie",lang="de",ignore.case=TRUE) ``` diff --git a/inst/tinytest/test_restatapi.R b/inst/tinytest/test_restatapi.R index 9f2aea9..716c965 100644 --- a/inst/tinytest/test_restatapi.R +++ b/inst/tinytest/test_restatapi.R @@ -475,7 +475,7 @@ if (grepl("\\.amzn|-aws|5.4.109+|-azure ",Sys.info()['release'])) { if (!is.null(bulk1)&!is.null(bulk2)){ kc<-colnames(bulk1) bulk1<-bulk1[,..kc] - bulk1<-bulk2[,..kc] + bulk2<-bulk2[,..kc] data.table::setorder(bulk1) data.table::setorder(bulk2) expect_true(identical(bulk1,bulk2)) # a44 @@ -584,7 +584,7 @@ if (grepl("\\.amzn|-aws|5.4.109+|-azure ",Sys.info()['release'])) { expect_equal(nrow(estat_data4),nrow(bulk2)) # a79 expect_true(nrow(raw4)>nrow(estat_data4)) # a80 } else {not_checked<-paste(not_checked,"a50-a80",sep=",")} - + if (tolower(testid1) %in% xml_toc$code) expect_true(!is.null(get_eurostat_data(testid1,update_cache=TRUE,check_toc=TRUE))) #a81 }