Skip to content

Commit

Permalink
Merge pull request #1066 from devinit/feature/oecd-apr24-update
Browse files Browse the repository at this point in the history
Feature/oecd apr24 update
  • Loading branch information
akmiller01 committed Apr 23, 2024
2 parents 025f31f + edf8d8a commit 6add3f2
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 47 deletions.
17 changes: 10 additions & 7 deletions data_updates/Python/download_oecd.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,16 @@ def download(scrape_path, download_path, output_folder_prefix):
# Unzip
dir_path = os.path.dirname(os.path.realpath(__file__))
remove_null_script_path = os.path.abspath(os.path.join(dir_path, "..", "remove_null.sh"))
with zipfile.ZipFile(path, "r") as zip_ref:
zip_ref.extractall(content_directory)
extracted_files = zip_ref.namelist()
for extracted_file in extracted_files:
full_path_extracted_file = os.path.join(content_directory, extracted_file)
rm_null_cmd = [remove_null_script_path, full_path_extracted_file]
subprocess.run(rm_null_cmd)
try:
with zipfile.ZipFile(path, "r") as zip_ref:
zip_ref.extractall(content_directory)
extracted_files = zip_ref.namelist()
for extracted_file in extracted_files:
full_path_extracted_file = os.path.join(content_directory, extracted_file)
rm_null_cmd = [remove_null_script_path, full_path_extracted_file]
subprocess.run(rm_null_cmd)
except zipfile.BadZipFile:
print("{} is not a valid zip file. Skipping...".format(name))

# Finished!
print("Finished.\t\t\t")
Expand Down
4 changes: 4 additions & 0 deletions data_updates/R/load_mirrors.R
Original file line number Diff line number Diff line change
Expand Up @@ -160,13 +160,15 @@ merge_crs_tables = function(file_vec){
"integer",
"integer",
"text",
"text",
"integer",
"text",
"text",
"text",
"integer",
"integer",
"text",
"text",
"integer",
"text",
"integer",
Expand Down Expand Up @@ -254,13 +256,15 @@ merge_crs_tables = function(file_vec){
names(crs_field_types) = c(
"year"
,"donor_code"
,"donor_iso3_code" # New Apr 2024
,"donor_name"
,"agency_code"
,"agency_name"
,"crs_id"
,"project_number"
,"initial_report"
,"recipient_code"
,"recipient_iso3_code" # New Apr 2024
,"recipient_name"
,"region_code"
,"region_name"
Expand Down
67 changes: 27 additions & 40 deletions data_updates/R/wdi.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,44 +28,31 @@ con = dbConnect(drv,
table.name = "wdi"
table.quote = c("repo",table.name)

ckan.url = "https://datacatalog.worldbank.org/api/3/action/package_show?id=90a34ea4-8a5c-11e6-ae22-56b6b64001"

res = GET(ckan.url)
if(res$status_code==200){
dat = content(res)
resources = dat$result[[1]]$resources
resource_names = sapply(resources,`[`,"name")
csv_index = resource_names == "CSV"
csv_resource = resources[csv_index][[1]]
csv_url = csv_resource$url
tmp.zip = tempfile(fileext = ".zip")
download.file(url=csv_url,destfile=tmp.zip)
tmp.csv = unzip(tmp.zip,files="WDIData.csv",exdir="/tmp")
wdi = fread(tmp.csv, header=T)
wdi[,V64:=NULL]
names(wdi)[1:4] = tolower(make.sql.names(make.names(names(wdi)[1:4])))

# Append melt
id.vars=c("country_name","country_code", "indicator_name", "indicator_code")
variable.name="year"
chunk.size=5000
num_chunks = floor(nrow(wdi)/chunk.size)
pb = txtProgressBar(max=num_chunks,style=3)
for(i in 0:num_chunks){
setTxtProgressBar(pb, i)
start_ind = 1 + (i * chunk.size)
end_ind = (i+1) * chunk.size
end_ind = min(end_ind,nrow(wdi))
chunk = wdi[start_ind:end_ind,]
chunk.m = melt(chunk,id.vars=id.vars,variable.name=variable.name)
rm(chunk)
gc()
dbWriteTable(con, name = table.quote, value = chunk.m, row.names = F, overwrite = (i==0), append = (i>0))
rm(chunk.m)
gc()
}
close(pb)
dbDisconnect(con)
}else{
stop("HTTP error: ",res$status_code)
csv_url = "https://databank.worldbank.org/data/download/WDI_CSV.zip"
tmp.zip = tempfile(fileext = ".zip")
download.file(url=csv_url,destfile=tmp.zip)
tmp.csv = unzip(tmp.zip,files="WDICSV.csv",exdir="/tmp")
wdi = fread(tmp.csv, header=T)
names(wdi)[1:4] = tolower(make.sql.names(make.names(names(wdi)[1:4])))

# Append melt
id.vars=c("country_name","country_code", "indicator_name", "indicator_code")
variable.name="year"
chunk.size=5000
num_chunks = floor(nrow(wdi)/chunk.size)
pb = txtProgressBar(max=num_chunks,style=3)
for(i in 0:num_chunks){
setTxtProgressBar(pb, i)
start_ind = 1 + (i * chunk.size)
end_ind = (i+1) * chunk.size
end_ind = min(end_ind,nrow(wdi))
chunk = wdi[start_ind:end_ind,]
chunk.m = melt(chunk,id.vars=id.vars,variable.name=variable.name)
rm(chunk)
gc()
dbWriteTable(con, name = table.quote, value = chunk.m, row.names = F, overwrite = (i==0), append = (i>0))
rm(chunk.m)
gc()
}
close(pb)
dbDisconnect(con)

0 comments on commit 6add3f2

Please sign in to comment.