-
Notifications
You must be signed in to change notification settings - Fork 40
/
Copy pathteragen.sh
75 lines (56 loc) · 1.77 KB
/
teragen.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/bin/bash
trap "" HUP
#if [ $EUID -eq 0 ]; then
# echo "this script must not be run as root. su to hdfs user to run"
# exit 1
#fi
#MR_EXAMPLES_JAR=/usr/hdp/2.2.0.0-2041/hadoop-mapreduce/hadoop-mapreduce-examples.jar
MR_EXAMPLES_JAR=/opt/cloudera/parcels/CDH/jars/hadoop-mapreduce-examples-3.1.1.7.2.2.2-1.jar
#SIZE=500G
#ROWS=5000000000
#SIZE=100G
#ROWS=1000000000
SIZE=1T
ROWS=10000000000
# SIZE=10G
# ROWS=100000000
# SIZE=1G
# ROWS=10000000
LOGDIR=logs
if [ ! -d "$LOGDIR" ]
then
mkdir ./$LOGDIR
fi
DATE=`date +%Y-%m-%d:%H:%M:%S`
RESULTSFILE="./$LOGDIR/teragen_results_$DATE"
OUTPUT=/user/sunilemanjee/data/sandbox/poc/teragen/${SIZE}-terasort-input
# teragen.sh
# Kill any running MapReduce jobs
mapred job -list | grep job_ | awk ' { system("mapred job -kill " $1) } '
# Delete the output directory
hadoop fs -rm -r -f -skipTrash ${OUTPUT}
# Run teragen
time hadoop jar $MR_EXAMPLES_JAR teragen \
-Dmapreduce.map.log.level=INFO \
-Dmapreduce.reduce.log.level=INFO \
-Dyarn.app.mapreduce.am.log.level=INFO \
-Dio.file.buffer.size=131072 \
-Dmapreduce.map.cpu.vcores=1 \
-Dmapreduce.map.java.opts=-Xmx1536m \
-Dmapreduce.map.maxattempts=1 \
-Dmapreduce.map.memory.mb=2048 \
-Dmapreduce.map.output.compress=true \
-Dmapreduce.map.output.compress.codec=org.apache.hadoop.io.compress.Lz4Codec \
-Dmapreduce.reduce.cpu.vcores=1 \
-Dmapreduce.reduce.java.opts=-Xmx1536m \
-Dmapreduce.reduce.maxattempts=1 \
-Dmapreduce.reduce.memory.mb=2048 \
-Dmapreduce.task.io.sort.factor=100 \
-Dmapreduce.task.io.sort.mb=384 \
-Dyarn.app.mapreduce.am.command.opts=-Xmx768m \
-Dyarn.app.mapreduce.am.resource.mb=1024 \
-Dmapred.map.tasks=92 \
${ROWS} ${OUTPUT} >> $RESULTSFILE 2>&1
#-Dmapreduce.map.log.level=TRACE \
#-Dmapreduce.reduce.log.level=TRACE \
#-Dyarn.app.mapreqduce.am.log.level=TRACE \