-
Notifications
You must be signed in to change notification settings - Fork 7
Open
Description
I'm trying to save a DataFrame to CSV.
The generated file has only the header and presented the error described in the title of this issue.
Steps:
install.packages("sparklyr")
install.packages('dplyr')
install.packages('chunked', repos=c('https://cran.rstudio.com', 'http://edwindj.github.io/drat'))
library(sparklyr)
library(dplyr)
library(config)
library(DBI)
library(chunked)
library(crassy)
conf <- spark_config()
conf$spark.executor.memoryOverhead ="2g"
conf$spark.executor.memory <- "4g"
conf$spark.executor.cores <- 2
conf$spark.executor.instances <- 4
#conf$spark.shuffle.service.enabled <- TRUE
#conf$spark.dynamicAllocation.enabled <- TRUE
conf$spark.dynamicAllocation.enabled <- FALSE
conf$sparklyr.defaultPackages = c("com.datastax.spark:spark-cassandra-connector_2.11:2.4.1", "org.mongodb.spark:mongo-spark-connector_2.11:2.4.0","com.databricks:spark-csv_2.11:1.3.0")
conf$spark.serializer = "org.apache.spark.serializer.KryoSerializer"
# Connect to spark
sc <- spark_connect(master = "spark://myspark:7077",
spark_home = "/spark/home",
version = "2.4.0",
config = conf)
csv_file_path <- "/home/data/events.csv"
mongo_dbname <- "mydb"
mongo_collection <- "events"
sql_txt <- "SELECT id_api, cast(geometry.coordinates as string) as geo, isoDateTime FROM mongo_waze_tbl"
mongo_uri <- paste("mongodb://foo:bar*@10.8.0.5/",mongo_dbname,".",mongo_collection, "?readPreference=primaryPreferred",sep = "")
load <- invoke(spark_get_session(sc), "read") %>%
invoke("format", "com.mongodb.spark.sql.DefaultSource") %>%
invoke("option", "spark.mongodb.input.uri", mongo_uri) %>%
invoke("option", "keyspace", mongo_dbname) %>%
invoke("option", "table", mongo_collection) %>%
invoke("option", "header", TRUE) %>%
invoke("load")
mongo_df <- sparklyr:::spark_partition_register_df(sc, load, "mongo_waze_tbl", 0, FALSE)
mongo_flat_df <- tbl(sc, sql(sql_txt))
mongo_flat_chunked_df <- read_chunkwise(mongo_flat_df, chunk_size = 5000)
write_chunkwise(mongo_flat_chunked_df,csv_file_path)
Output:
Warning messages:
1: In FUN(X[[i]], ...) :
Unsupported type 'logical'; using default type 'string'
2: In FUN(X[[i]], ...) :
Unsupported type 'logical'; using default type 'string'
3: In FUN(X[[i]], ...) :
Unsupported type 'logical'; using default type 'string'
4: In FUN(X[[i]], ...) :
Unsupported type 'logical'; using default type 'string'
5: In FUN(X[[i]], ...) :
Unsupported type 'logical'; using default type 'string'
6: In FUN(X[[i]], ...) :
Unsupported type 'logical'; using default type 'string'
7: In FUN(X[[i]], ...) :
Unsupported type 'logical'; using default type 'string'
Metadata
Metadata
Assignees
Labels
No labels