Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.

Commit 9653ab4

Browse files
authored
Add microbenchmark for FC + add fusion (#20780)
+ utils scripts to run it
1 parent 75b3719 commit 9653ab4

File tree

3 files changed

+300
-0
lines changed

3 files changed

+300
-0
lines changed

benchmark/python/dnnl/fc_add.py

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
import time
19+
import gc
20+
import sys
21+
import mxnet as mx
22+
from mxnet.gluon import nn
23+
from mxnet.contrib import quantization
24+
25+
#shape, num_hidden:
26+
sizes = [
27+
(( 1, 224), 512),
28+
(( 1, 224), 4096),
29+
(( 16, 1024), 1024),
30+
(( 32, 4096), 1024),
31+
(( 32, 4096), 4096),
32+
((512, 512), 4096)]
33+
34+
rounds = 1000
35+
warmup = 10
36+
37+
test_header = "--no_test_header" not in sys.argv
38+
table_header = "--no_table_header" not in sys.argv
39+
table_left_colums = "--no_size_column" not in sys.argv
40+
dump_graph = "--dump_graph" in sys.argv
41+
42+
def dump_graph_fn(net, postfix):
43+
if dump_graph:
44+
net.export("/tmp/fc_add_" + postfix)
45+
46+
def operator_string(elemwise_add):
47+
return 'elemwise_add' if elemwise_add else 'npi_add'
48+
49+
def print_header(header):
50+
print("\n")
51+
print(header if test_header else "", "\n")
52+
if table_header:
53+
if table_left_colums:
54+
print("| Shape | Hidden | Mean [ms] |" )
55+
print("|------------:|-------:|----------:|" )
56+
else:
57+
print(" Mean [ms] |" )
58+
print("----------:|" )
59+
60+
def print_value(shape, hidden, mean):
61+
if table_left_colums:
62+
print("| ({:4},{:4}) | {:6} | {:9.3f} |".format(shape[0], shape[1], hidden, mean))
63+
else:
64+
print(" {:9.3f} |".format(mean))
65+
66+
67+
def measure(net, data0, data1, data2, shape, nhid):
68+
mx.nd.waitall()
69+
gc.collect()
70+
gc.disable()
71+
for i in range(rounds + warmup):
72+
if i == warmup:
73+
start_time = time.time()
74+
o = net(data0, data1, data2)
75+
o.wait_to_read()
76+
end_time = time.time()
77+
run_time = (end_time - start_time)
78+
print_value(shape, nhid, 1000 * run_time / rounds)
79+
gc.enable()
80+
81+
82+
class FCWithSum(nn.HybridBlock):
83+
def __init__(self, num_in, num_hidden, elemwise_add, **kwargs):
84+
super(FCWithSum, self).__init__(**kwargs)
85+
self.fc0 = nn.Dense(units=num_hidden, in_units=num_in)
86+
self.fc1 = nn.Dense(units=num_hidden)
87+
self.elemwise_add = elemwise_add
88+
89+
def forward(self, data0, data1, data2):
90+
_fc0 = self.fc0(data0)
91+
_fc1 = self.fc1(data1)
92+
if self.elemwise_add:
93+
_sum0 = mx.nd.elemwise_add(data2.as_nd_ndarray(), _fc0.as_nd_ndarray()).as_np_ndarray()
94+
_sum1 = mx.nd.elemwise_add(_fc1.as_nd_ndarray(), _sum0.as_nd_ndarray()).as_np_ndarray()
95+
else:
96+
_sum0 = data2 + _fc0
97+
_sum1 = _fc1 + _sum0
98+
return _sum1
99+
100+
def benchmark_float(elemwise_add):
101+
header = operator_string(elemwise_add) + ', float'
102+
print_header(header)
103+
for shape, nhid in sizes:
104+
net = FCWithSum(shape[1], nhid, elemwise_add)
105+
net.initialize()
106+
net.hybridize(static_alloc=True, static_shape=True)
107+
data0 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0)
108+
data1 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0)
109+
shape2 = (shape[0], nhid)
110+
data2 = mx.np.random.uniform(size=shape2, low=-1.0, high=1.0)
111+
net.optimize_for(data0, data1, data2, backend='ONEDNN')
112+
measure(net, data0, data1, data2, shape, nhid)
113+
dump_graph_fn(net, operator_string(elemwise_add) + '_float')
114+
115+
class CalibIter(mx.io.DataIter):
116+
def __init__(self, batch, data_shape, batch_size):
117+
super(CalibIter, self).__init__(batch_size)
118+
self.label_shape = (batch_size,)
119+
self.data_shape = data_shape
120+
if isinstance(data_shape, tuple):
121+
self.provide_data = [('data', data_shape)]
122+
else:
123+
self.provide_data = data_shape
124+
self.provide_label = []
125+
self.batch = batch
126+
def __iter__(self):
127+
yield self.batch
128+
129+
def benchmark_int8(quantize_mode, quantize_granularity, elemwise_add):
130+
header = operator_string(elemwise_add) + ', mode = ' + quantize_mode + \
131+
', granularity = ' + quantize_granularity
132+
print_header(header)
133+
for shape, nhid in sizes:
134+
net = FCWithSum(shape[1], nhid, elemwise_add)
135+
net.initialize()
136+
net.hybridize(static_alloc=True, static_shape=True)
137+
data0 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0)
138+
data1 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0)
139+
shape2 = (shape[0], nhid)
140+
data2 = mx.np.random.uniform(size=shape2, low=-1.0, high=1.0)
141+
data = mx.gluon.data.ArrayDataset(data0, data1, data2)
142+
calib_data = mx.gluon.data.DataLoader(data, batch_size=1)
143+
net = quantization.quantize_net(net,
144+
device=mx.cpu(),
145+
exclude_layers=None,
146+
exclude_operators=None,
147+
calib_mode='naive',
148+
calib_data=calib_data,
149+
num_calib_batches=1,
150+
quantize_mode=quantize_mode,
151+
quantize_granularity=quantize_granularity
152+
)
153+
net.hybridize(static_alloc=True, static_shape=True)
154+
measure(net, data0, data1, data2, shape, nhid)
155+
dump_graph_fn(net, operator_string(elemwise_add) + \
156+
'_' + str(quantize_mode) + '_' + str(quantize_granularity))
157+
158+
for elemwise_add in [True, False]:
159+
benchmark_float(elemwise_add)
160+
161+
for quantize_mode in ['smart', 'full']:
162+
for quantize_granularity in ['tensor-wise', 'channel-wise']:
163+
for elemwise_add in [True, False]:
164+
benchmark_int8(quantize_mode, quantize_granularity, elemwise_add)

benchmark/python/dnnl/run.sh

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#!/bin/bash
2+
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
20+
# Script for running python benchmark with properly setting OMP prarameters for it
21+
22+
check_parametrs() {
23+
if [ "$#" -eq 0 ] ; then
24+
echo "Please give python script to run as parameter."
25+
echo "Optionally you can give number of threads to use and python scripts parameters:"
26+
echo " `basename "$0"` [num_threads] python_script [python script parameters]"
27+
exit
28+
fi
29+
}
30+
31+
check_parametrs $@
32+
33+
NUM_SOCKET=`lscpu | grep 'Socket(s)' | awk '{print $NF}'`
34+
CORES_PER_SOCKET=`lscpu | grep 'Core(s) per socket' | awk '{print $NF}'`
35+
NUM_CORES=$((CORES_PER_SOCKET * NUM_SOCKET))
36+
37+
integer_reg='^[0-9]+$'
38+
if [[ $1 =~ $integer_reg ]] ; then
39+
if (($1 > $NUM_CORES)); then
40+
echo >&2
41+
echo "WARNING: given number of threads = $1" \
42+
" is greater than number of physical cores = $NUM_CORES." >&2
43+
echo >&2
44+
fi
45+
NUM_CORES=$1
46+
shift
47+
check_parametrs $@
48+
fi
49+
50+
CORES={0}:${NUM_CORES}:1
51+
52+
INSTRUCTION="OMP_NUM_THREADS=${NUM_CORES} OMP_PROC_BIND=TRUE OMP_PLACES=${CORES} python3 -u $@"
53+
echo $INSTRUCTION >&2
54+
eval $INSTRUCTION
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#!/bin/bash
2+
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
20+
# Script for running python benchmark against number of used OMP threads
21+
22+
23+
help_and_exit() {
24+
echo "Usage:"
25+
echo " `basename "$0"` [start_num_threads step_num_threads end_num_threads] python_script [python script parameters]"
26+
echo "Number of threads range parameters and python script are optional."
27+
exit
28+
}
29+
30+
if [ "$#" -eq 0 ] ; then
31+
help_and_exit
32+
fi
33+
34+
NUM_SOCKET=`lscpu | grep 'Socket(s)' | awk '{print $NF}'`
35+
CORES_PER_SOCKET=`lscpu | grep 'Core(s) per socket' | awk '{print $NF}'`
36+
NUM_CORES=$((CORES_PER_SOCKET * NUM_SOCKET))
37+
38+
NT_START=1
39+
NT_STEP=1
40+
NT_END=$NUM_CORES
41+
42+
integer_reg='^[0-9]+$'
43+
signed_integer_reg='^[+-]*[0-9]+$'
44+
if [[ $1 =~ $integer_reg ]] ; then
45+
if [[ $2 =~ $signed_integer_reg ]] && [[ $3 =~ $integer_reg ]]; then
46+
NT_START=$1
47+
NT_STEP=$2
48+
NT_END=$3
49+
shift 3
50+
if [ "$#" -eq 0 ] ; then
51+
help_and_exit
52+
fi
53+
else
54+
echo "Provide 3 numbers for threads range: start, step and the end."
55+
help_and_exit
56+
fi
57+
fi
58+
59+
NT_SEQUENCE=`seq $NT_START $NT_STEP $NT_END`
60+
if [ -z "$NT_SEQUENCE" ]; then
61+
echo "Given threads range produce empy sequence."
62+
help_and_exit
63+
else
64+
echo "Start python script $1 for following number of threads:" >&2
65+
echo $NT_SEQUENCE >&2
66+
fi
67+
68+
RUN_SCRIPT=`dirname "$0"`/run.sh
69+
for NT in $NT_SEQUENCE;
70+
do
71+
TMP_FILE=/tmp/_result_${NT}.txt
72+
echo 1>${TMP_FILE}
73+
if [[ $NT -eq $NT_START ]]; then
74+
echo "NUM_THREADS = $NT" 1>>${TMP_FILE}
75+
$RUN_SCRIPT $NT $@ 1>>${TMP_FILE}
76+
else
77+
echo " $NT" 1>>${TMP_FILE}
78+
$RUN_SCRIPT $NT $@ --no_size_column --no_test_header 1>>${TMP_FILE}
79+
fi
80+
TMP_FILES+=" ${TMP_FILE}"
81+
done
82+
paste -d "" ${TMP_FILES}

0 commit comments

Comments
 (0)