pip install livyc
from livyc import livyc
data_livy = {
"livy_server_url": "localhost",
"port": "8998",
"jars": ["org.postgresql:postgresql:42.3.1"]
}
params = {"host": "localhost", "port":"5432", "database": "db", "table":"staging", "user": "postgres", "password": "pg12345"}
pyspark_script = """
from pyspark.sql.functions import udf, col, explode
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, ArrayType
from pyspark.sql import Row
from pyspark.sql import SparkSession
df = spark.read.format("jdbc") \
.option("url", "jdbc:postgresql://{host}:{port}/{database}") \
.option("driver", "org.postgresql.Driver") \
.option("dbtable", "{table}") \
.option("user", "{user}") \
.option("password", "{password}") \
.load()
n_rows = df.count()
spark.stop()
"""
lvy = livyc.LivyC(data_livy)
session = lvy.create_session()
lvy.run_script(session, pyspark_script.format(**params))
lvy.read_variable(session, "n_rows")
Any ideas or feedback about this repository?. Help me to improve it.
- Created by Ramses Alexander Coraspe Valdez
- Created on 2022
This project is licensed under the terms of the MIT License.