-
Notifications
You must be signed in to change notification settings - Fork 34
Description
I have been using RJDBC and DBI ibraries in the past with the RPostgres::Postgres() driver without problems. But it requires authentication by userID and password. Recently all the data of interest has migrated to Andes 3.0 and is available only with IAM authentication (access_key_id, secret_access_key, session_token). It seems that your driver is the only one that accept this type of authentication. So, I am trying now to use it as in the script below. It works for a few minutes (I can display the head of the table for example), but the R session invariably ends up crashing with a *** caught segfault *** address (nil), cause ‘memory not mapped’ error.
I run the script on a cloud desktop (c6a.4xlarge) running Amazon Linux 2 (x86_64 5.10) with R-4.4.2 with packages versions:
RJDBC_0.2-10, rJava_1.0-11, DBI_1.2.3.
Let me know if you need more details.
### MAIN ###
#Code to connect to Andes with JDBC#
#(See https://docs.aws.amazon.com/redshift/latest/mgmt/jdbc20-install.html for more info)
#General libraries
library(tidyverse)
library(magrittr)
#Specific libraries for connecting to DBs
#library(rJava)
library(RJDBC)
library(DBI)
fname_driver_jdbc <- "redshift-jdbc42-2.1.0.32.jar" #downloaded from https://docs.aws.amazon.com/redshift/latest/mgmt/jdbc20-download-driver.html
path_driver <- "/home/myself/R/drivers/"
fpath_driver_jdbc <- glue("{path_driver}{fname_driver_jdbc}")
driver_0 <- #From RJDBC library:
JDBC(driverClass = "com.amazon.redshift.Driver",
classPath = Sys.glob(fpath_driver_jdbc),
identifier.quote = "`")
region_0 <- "us-east-1"
db_name_0 <- "XXXX_data_prod"
host_0 <- "XXXX-data-redshift.abcdefghijklmnop-east-1.redshift.amazonaws.com"
port_0 <- 5438
clust_id_0 <- "XXXX-data-redshift"
url_0 <- glue("jdbc:redshift:iam://{host_0}")
profil_IDP <- "XXXX-data-blah"
##From `ada profile list`:
#Profile: XXXX-data-blah
#Provider: conduit
#Account: XXXXXXXXXXXX
#Role: IibsAdminAccess-DO-NOT-DELETE
#Partition: aws
dttm_exp <- fun_update_cred2(profil_IDP) #First run code from appendix
conn_andes <- #From DBI library
dbConnect(drv = driver_0,
url = url_0,
DBNAME = db_name_0,
PORT = port_0,
Region = region_0,
ClusterID = clust_id_0,
groupFederation = TRUE,
AccessKeyID = Sys.getenv("AWS_ACCESS_KEY_ID"),
SecretAccessKey = Sys.getenv("AWS_SECRET_ACCESS_KEY"),
SessionToken = Sys.getenv("AWS_SESSION_TOKEN"))
catalog_blah <- "XXXXX"
schema_blah <- "XXXXXX"
table_blah <- "xxxxxxxx"
db_blah <- tbl(conn_andes, in_catalog(catalog_blah, schema_blah, table_blah))
db_blah %>% head
#It works! But if I include that in a script, after a little while the script crashes.
#The error: "*** caught segfault *** address (nil), cause ‘memory not mapped’" hints toward a memory leak.
### APPENDIX ###
#Code for updating credentials#
#retrieves access_key, secret_key, and session_token from ada CLI
fun_aws_cred2 <- function(profil = profil_default) {
cmd_ada <- glue("ada credentials print --profile {profil}")
cred_json <- system(cmd_ada, intern = TRUE)
list_cred <- jsonlite::fromJSON(cred_json)
dttm_exp <- lubridate::as_datetime(list_cred$Expiration) #UTC (default)
return(list(aws_key_id = list_cred$AccessKeyId,
aws_key_secret = list_cred$SecretAccessKey,
aws_sess_token = list_cred$SessionToken,
aws_dttm_exp = dttm_exp))
}
#Saves access-key, secret-key, and session_token into environment variables and returns credential expiration datetime.
fun_update_cred2 <- function(profil = profil_default) {
aws_region <- fun_aws_region(profil) #This just retrieves region from ~/.aws/config (usually us-east-1)
aws_cred <- fun_aws_cred2(profil)
new_key <- aws_cred['aws_key_id']
dttm_exp <- aws_cred['aws_dttm_exp'] %>% as.numeric
Sys.setenv("AWS_DEFAULT_REGION" = aws_region,
"AWS_ACCESS_KEY_ID" = new_key,
"AWS_SECRET_ACCESS_KEY" = aws_cred['aws_key_secret'],
"AWS_SESSION_TOKEN" = aws_cred['aws_sess_token'])
return(lubridate::as_datetime(dttm_exp))
}