-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspark-submit-scala-apply-test.sh
63 lines (55 loc) · 2.61 KB
/
spark-submit-scala-apply-test.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/bin/bash
# macos version
# run test spark job on localhost, scala-apply performance testing
[ -z "${JAVA_HOME}" ] && export JAVA_HOME=$(/usr/libexec/java_home -v 1.8)
java_version=$("${JAVA_HOME}/bin/java" -version 2>&1 | awk -F '"' '/version/ {print substr($2,0,3)}')
if [ "$java_version" != "1.8" ]; then @echo "java 8 needed"; fi
export PATH=${JAVA_HOME}/bin:$PATH
[ -z "${SPARK_HOME}" ] && export SPARK_HOME=${HOME}/.sparkenv/spark-2.4.3-bin-hadoop2.7
export SPARK_LOCAL_HOSTNAME=localhost
export SPARK_MASTER_HOST=localhost
export SPARK_WORKER_INSTANCES=2
export SPARK_MASTER_OPTS="-Dspark.deploy.defaultCores=4"
unset UBER_JAR
[ -z "${UBER_JAR}" ] && export UBER_JAR=$(find "../target/scala-2.11" -name '*-transformers-assembly-*.jar')
export SPARK_JARS=${UBER_JAR}
export JAVA_OPTS="-XX:MaxMetaspaceSize=1G -Xmx10G"
export extraJavaOptions="-XX:+UseCompressedOops -XX:hashCode=0"
# -XX:+PrintCompilation -XX:+UnlockDiagnosticVMOptions -XX:+PrintInlining
# see src/test/scripts/run-spark-standalone.sh
#export clusterMode='--master spark://localhost:7077 --deploy-mode cluster'
export clusterMode='--master local[4] --deploy-mode client'
mkdir /tmp/spark-events
time "${SPARK_HOME}/bin/spark-submit" --verbose \
--name "test-scala-apply" \
--class "transformers.ApplyModelsJob" \
--queue "default" \
${clusterMode} \
--total-executor-cores 4 --num-executors 4 --executor-cores 1 --executor-memory 1G --driver-memory 2G \
--conf spark.executor.memoryOverhead=2G \
--conf spark.sql.shuffle.partitions=16 \
--conf spark.hadoop.dfs.replication=1 \
--conf spark.speculation=false \
--conf spark.dynamicAllocation.enabled=false \
--conf spark.dynamicAllocation.maxExecutors=4 \
--conf spark.dynamicAllocation.minExecutors=4 \
--conf spark.sql.parquet.compression.codec=gzip \
--conf spark.hadoop.zlib.compress.level=BEST_COMPRESSION \
--conf spark.eventLog.enabled=true \
--conf spark.driver.maxResultSize=2g \
--conf spark.broadcast.blockSize=4m \
--conf "spark.executor.extraJavaOptions=${extraJavaOptions}" \
--conf "spark.driver.extraJavaOptions=${extraJavaOptions}" \
--conf spark.checkpoint.dir=hdfs:///tmp \
--conf spark.sql.sources.partitionColumnTypeInference.enabled=false \
--conf spark.yarn.maxAppAttempts=1 \
"${UBER_JAR}" \
featuresIndex=/tmp/grouped_features.json \
featuresDataset=/tmp/features.gz.parquet \
numPartitions=4 \
explodeFactor=5000 \
repeat=2 \
scalameter=off
#--conf spark.memory.offHeap.enabled=true \
#--conf spark.memory.offHeap.size=1000000000 \
# --jars /etc/hive/conf/hive-site.xml \