-
Notifications
You must be signed in to change notification settings - Fork 40
/
Copy pathterasort.sh
65 lines (51 loc) · 1.67 KB
/
terasort.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/bin/bash
trap "" HUP
#if [ $EUID -eq 0 ]; then
# echo "this script must not be run as root. su to hdfs user to run"
# exit 1
#fi
MR_EXAMPLES_JAR=/opt/cloudera/parcels/CDH/jars/hadoop-mapreduce-examples-3.1.1.7.2.2.2-1.jar
#SIZE=500G
#SIZE=100G
SIZE=1T
#SIZE=1G
#SIZE=10G
#INPUT=/${SIZE}-terasort-input
#OUTPUT=/${SIZE}-terasort-output
LOGDIR=logs
if [ ! -d "$LOGDIR" ]
then
mkdir ./$LOGDIR
fi
DATE=`date +%Y-%m-%d:%H:%M:%S`
RESULTSFILE="./$LOGDIR/terasort_results_$DATE"
INPUT=/user/sunilemanjee/data/sandbox/poc/teragen/${SIZE}-terasort-input
OUTPUT=/user/sunilemanjee/data/sandbox/poc/teragen/${SIZE}-terasort-output
# terasort.sh
# Kill any running MapReduce jobs
mapred job -list | grep job_ | awk ' { system("mapred job -kill " $1) } '
# Delete the output directory
hadoop fs -rm -r -f -skipTrash ${OUTPUT}
# Run terasort
time hadoop jar $MR_EXAMPLES_JAR terasort \
-Dmapreduce.map.log.level=INFO \
-Dmapreduce.reduce.log.level=INFO \
-Dyarn.app.mapreduce.am.log.level=INFO \
-Dio.file.buffer.size=131072 \
-Dmapreduce.map.cpu.vcores=1 \
-Dmapreduce.map.java.opts=-Xmx1536m \
-Dmapreduce.map.maxattempts=1 \
-Dmapreduce.map.memory.mb=2048 \
-Dmapreduce.map.output.compress=true \
-Dmapreduce.map.output.compress.codec=org.apache.hadoop.io.compress.Lz4Codec \
-Dmapreduce.reduce.cpu.vcores=1 \
-Dmapreduce.reduce.java.opts=-Xmx1536m \
-Dmapreduce.reduce.maxattempts=1 \
-Dmapreduce.reduce.memory.mb=2048 \
-Dmapreduce.task.io.sort.factor=300 \
-Dmapreduce.task.io.sort.mb=384 \
-Dyarn.app.mapreduce.am.command.opts=-Xmx768m \
-Dyarn.app.mapreduce.am.resource.mb=1024 \
-Dmapred.reduce.tasks=92 \
-Dmapreduce.terasort.output.replication=1 \
${INPUT} ${OUTPUT} >> $RESULTSFILE 2>&1