From 44145fc856154af55ef07cddd003441bd2d68a64 Mon Sep 17 00:00:00 2001
From: akmiller01 <alex.k.miller@gmail.com>
Date: Tue, 23 Apr 2024 09:41:46 -0400
Subject: [PATCH 1/2] WDI no longer on CKAN api

---
 data_updates/R/wdi.R | 67 ++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 40 deletions(-)

diff --git a/data_updates/R/wdi.R b/data_updates/R/wdi.R
index 84b1805f8..4724f9ef2 100644
--- a/data_updates/R/wdi.R
+++ b/data_updates/R/wdi.R
@@ -28,44 +28,31 @@ con = dbConnect(drv,
 table.name = "wdi"
 table.quote = c("repo",table.name)
 
-ckan.url = "https://datacatalog.worldbank.org/api/3/action/package_show?id=90a34ea4-8a5c-11e6-ae22-56b6b64001"
-
-res = GET(ckan.url)
-if(res$status_code==200){
-  dat = content(res)
-  resources = dat$result[[1]]$resources
-  resource_names = sapply(resources,`[`,"name")
-  csv_index = resource_names == "CSV"
-  csv_resource = resources[csv_index][[1]]
-  csv_url = csv_resource$url
-  tmp.zip = tempfile(fileext = ".zip")
-  download.file(url=csv_url,destfile=tmp.zip)
-  tmp.csv = unzip(tmp.zip,files="WDIData.csv",exdir="/tmp")
-  wdi = fread(tmp.csv, header=T)
-  wdi[,V64:=NULL]
-  names(wdi)[1:4] = tolower(make.sql.names(make.names(names(wdi)[1:4])))
-
-  # Append melt
-  id.vars=c("country_name","country_code", "indicator_name", "indicator_code")
-  variable.name="year"
-  chunk.size=5000
-  num_chunks = floor(nrow(wdi)/chunk.size)
-  pb = txtProgressBar(max=num_chunks,style=3)
-  for(i in 0:num_chunks){
-    setTxtProgressBar(pb, i)
-    start_ind = 1 + (i * chunk.size)
-    end_ind = (i+1) * chunk.size
-    end_ind = min(end_ind,nrow(wdi))
-    chunk = wdi[start_ind:end_ind,]
-    chunk.m = melt(chunk,id.vars=id.vars,variable.name=variable.name)
-    rm(chunk)
-    gc()
-    dbWriteTable(con, name = table.quote, value = chunk.m, row.names = F, overwrite = (i==0), append = (i>0))
-    rm(chunk.m)
-    gc()
-  }
-  close(pb)
-  dbDisconnect(con)
-}else{
-  stop("HTTP error: ",res$status_code)
+csv_url = "https://databank.worldbank.org/data/download/WDI_CSV.zip"
+tmp.zip = tempfile(fileext = ".zip")
+download.file(url=csv_url,destfile=tmp.zip)
+tmp.csv = unzip(tmp.zip,files="WDICSV.csv",exdir="/tmp")
+wdi = fread(tmp.csv, header=T)
+names(wdi)[1:4] = tolower(make.sql.names(make.names(names(wdi)[1:4])))
+
+# Append melt
+id.vars=c("country_name","country_code", "indicator_name", "indicator_code")
+variable.name="year"
+chunk.size=5000
+num_chunks = floor(nrow(wdi)/chunk.size)
+pb = txtProgressBar(max=num_chunks,style=3)
+for(i in 0:num_chunks){
+  setTxtProgressBar(pb, i)
+  start_ind = 1 + (i * chunk.size)
+  end_ind = (i+1) * chunk.size
+  end_ind = min(end_ind,nrow(wdi))
+  chunk = wdi[start_ind:end_ind,]
+  chunk.m = melt(chunk,id.vars=id.vars,variable.name=variable.name)
+  rm(chunk)
+  gc()
+  dbWriteTable(con, name = table.quote, value = chunk.m, row.names = F, overwrite = (i==0), append = (i>0))
+  rm(chunk.m)
+  gc()
 }
+close(pb)
+dbDisconnect(con)

From edf8d8aa9afbf437e48017f7e9be950aabc5bad4 Mon Sep 17 00:00:00 2001
From: akmiller01 <alex.k.miller@gmail.com>
Date: Tue, 23 Apr 2024 10:02:38 -0400
Subject: [PATCH 2/2] New columns for CRS April 2024

---
 data_updates/Python/download_oecd.py | 17 ++++++++++-------
 data_updates/R/load_mirrors.R        |  4 ++++
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/data_updates/Python/download_oecd.py b/data_updates/Python/download_oecd.py
index 2aac802c2..67c69d674 100644
--- a/data_updates/Python/download_oecd.py
+++ b/data_updates/Python/download_oecd.py
@@ -146,13 +146,16 @@ def download(scrape_path, download_path, output_folder_prefix):
         # Unzip
         dir_path = os.path.dirname(os.path.realpath(__file__))
         remove_null_script_path = os.path.abspath(os.path.join(dir_path, "..", "remove_null.sh"))
-        with zipfile.ZipFile(path, "r") as zip_ref:
-            zip_ref.extractall(content_directory)
-            extracted_files = zip_ref.namelist()
-            for extracted_file in extracted_files:
-                full_path_extracted_file = os.path.join(content_directory, extracted_file)
-                rm_null_cmd = [remove_null_script_path, full_path_extracted_file]
-                subprocess.run(rm_null_cmd)
+        try:
+            with zipfile.ZipFile(path, "r") as zip_ref:
+                zip_ref.extractall(content_directory)
+                extracted_files = zip_ref.namelist()
+                for extracted_file in extracted_files:
+                    full_path_extracted_file = os.path.join(content_directory, extracted_file)
+                    rm_null_cmd = [remove_null_script_path, full_path_extracted_file]
+                    subprocess.run(rm_null_cmd)
+        except zipfile.BadZipFile:
+            print("{} is not a valid zip file. Skipping...".format(name))
 
     # Finished!
     print("Finished.\t\t\t")
diff --git a/data_updates/R/load_mirrors.R b/data_updates/R/load_mirrors.R
index 0b2670029..a0eae1979 100644
--- a/data_updates/R/load_mirrors.R
+++ b/data_updates/R/load_mirrors.R
@@ -160,6 +160,7 @@ merge_crs_tables = function(file_vec){
     "integer",
     "integer",
     "text",
+    "text",
     "integer",
     "text",
     "text",
@@ -167,6 +168,7 @@ merge_crs_tables = function(file_vec){
     "integer",
     "integer",
     "text",
+    "text",
     "integer",
     "text",
     "integer",
@@ -254,6 +256,7 @@ merge_crs_tables = function(file_vec){
   names(crs_field_types) = c(
     "year"
     ,"donor_code"
+    ,"donor_iso3_code" # New Apr 2024
     ,"donor_name"
     ,"agency_code"
     ,"agency_name"
@@ -261,6 +264,7 @@ merge_crs_tables = function(file_vec){
     ,"project_number"
     ,"initial_report"
     ,"recipient_code"
+    ,"recipient_iso3_code" # New Apr 2024
     ,"recipient_name"
     ,"region_code"
     ,"region_name"