|
| 1 | +# Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +# or more contributor license agreements. See the NOTICE file |
| 3 | +# distributed with this work for additional information |
| 4 | +# regarding copyright ownership. The ASF licenses this file |
| 5 | +# to you under the Apache License, Version 2.0 (the |
| 6 | +# "License"); you may not use this file except in compliance |
| 7 | +# with the License. You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, |
| 12 | +# software distributed under the License is distributed on an |
| 13 | +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +# KIND, either express or implied. See the License for the |
| 15 | +# specific language governing permissions and limitations |
| 16 | +# under the License. |
| 17 | + |
| 18 | +import time |
| 19 | +import gc |
| 20 | +import sys |
| 21 | +import mxnet as mx |
| 22 | +from mxnet.gluon import nn |
| 23 | +from mxnet.contrib import quantization |
| 24 | + |
| 25 | +#shape, num_hidden: |
| 26 | +sizes = [ |
| 27 | + (( 1, 224), 512), |
| 28 | + (( 1, 224), 4096), |
| 29 | + (( 16, 1024), 1024), |
| 30 | + (( 32, 4096), 1024), |
| 31 | + (( 32, 4096), 4096), |
| 32 | + ((512, 512), 4096)] |
| 33 | + |
| 34 | +rounds = 1000 |
| 35 | +warmup = 10 |
| 36 | + |
| 37 | +test_header = "--no_test_header" not in sys.argv |
| 38 | +table_header = "--no_table_header" not in sys.argv |
| 39 | +table_left_colums = "--no_size_column" not in sys.argv |
| 40 | +dump_graph = "--dump_graph" in sys.argv |
| 41 | + |
| 42 | +def dump_graph_fn(net, postfix): |
| 43 | + if dump_graph: |
| 44 | + net.export("/tmp/fc_add_" + postfix) |
| 45 | + |
| 46 | +def operator_string(elemwise_add): |
| 47 | + return 'elemwise_add' if elemwise_add else 'npi_add' |
| 48 | + |
| 49 | +def print_header(header): |
| 50 | + print("\n") |
| 51 | + print(header if test_header else "", "\n") |
| 52 | + if table_header: |
| 53 | + if table_left_colums: |
| 54 | + print("| Shape | Hidden | Mean [ms] |" ) |
| 55 | + print("|------------:|-------:|----------:|" ) |
| 56 | + else: |
| 57 | + print(" Mean [ms] |" ) |
| 58 | + print("----------:|" ) |
| 59 | + |
| 60 | +def print_value(shape, hidden, mean): |
| 61 | + if table_left_colums: |
| 62 | + print("| ({:4},{:4}) | {:6} | {:9.3f} |".format(shape[0], shape[1], hidden, mean)) |
| 63 | + else: |
| 64 | + print(" {:9.3f} |".format(mean)) |
| 65 | + |
| 66 | + |
| 67 | +def measure(net, data0, data1, data2, shape, nhid): |
| 68 | + mx.nd.waitall() |
| 69 | + gc.collect() |
| 70 | + gc.disable() |
| 71 | + for i in range(rounds + warmup): |
| 72 | + if i == warmup: |
| 73 | + start_time = time.time() |
| 74 | + o = net(data0, data1, data2) |
| 75 | + o.wait_to_read() |
| 76 | + end_time = time.time() |
| 77 | + run_time = (end_time - start_time) |
| 78 | + print_value(shape, nhid, 1000 * run_time / rounds) |
| 79 | + gc.enable() |
| 80 | + |
| 81 | + |
| 82 | +class FCWithSum(nn.HybridBlock): |
| 83 | + def __init__(self, num_in, num_hidden, elemwise_add, **kwargs): |
| 84 | + super(FCWithSum, self).__init__(**kwargs) |
| 85 | + self.fc0 = nn.Dense(units=num_hidden, in_units=num_in) |
| 86 | + self.fc1 = nn.Dense(units=num_hidden) |
| 87 | + self.elemwise_add = elemwise_add |
| 88 | + |
| 89 | + def forward(self, data0, data1, data2): |
| 90 | + _fc0 = self.fc0(data0) |
| 91 | + _fc1 = self.fc1(data1) |
| 92 | + if self.elemwise_add: |
| 93 | + _sum0 = mx.nd.elemwise_add(data2.as_nd_ndarray(), _fc0.as_nd_ndarray()).as_np_ndarray() |
| 94 | + _sum1 = mx.nd.elemwise_add(_fc1.as_nd_ndarray(), _sum0.as_nd_ndarray()).as_np_ndarray() |
| 95 | + else: |
| 96 | + _sum0 = data2 + _fc0 |
| 97 | + _sum1 = _fc1 + _sum0 |
| 98 | + return _sum1 |
| 99 | + |
| 100 | +def benchmark_float(elemwise_add): |
| 101 | + header = operator_string(elemwise_add) + ', float' |
| 102 | + print_header(header) |
| 103 | + for shape, nhid in sizes: |
| 104 | + net = FCWithSum(shape[1], nhid, elemwise_add) |
| 105 | + net.initialize() |
| 106 | + net.hybridize(static_alloc=True, static_shape=True) |
| 107 | + data0 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0) |
| 108 | + data1 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0) |
| 109 | + shape2 = (shape[0], nhid) |
| 110 | + data2 = mx.np.random.uniform(size=shape2, low=-1.0, high=1.0) |
| 111 | + net.optimize_for(data0, data1, data2, backend='ONEDNN') |
| 112 | + measure(net, data0, data1, data2, shape, nhid) |
| 113 | + dump_graph_fn(net, operator_string(elemwise_add) + '_float') |
| 114 | + |
| 115 | +class CalibIter(mx.io.DataIter): |
| 116 | + def __init__(self, batch, data_shape, batch_size): |
| 117 | + super(CalibIter, self).__init__(batch_size) |
| 118 | + self.label_shape = (batch_size,) |
| 119 | + self.data_shape = data_shape |
| 120 | + if isinstance(data_shape, tuple): |
| 121 | + self.provide_data = [('data', data_shape)] |
| 122 | + else: |
| 123 | + self.provide_data = data_shape |
| 124 | + self.provide_label = [] |
| 125 | + self.batch = batch |
| 126 | + def __iter__(self): |
| 127 | + yield self.batch |
| 128 | + |
| 129 | +def benchmark_int8(quantize_mode, quantize_granularity, elemwise_add): |
| 130 | + header = operator_string(elemwise_add) + ', mode = ' + quantize_mode + \ |
| 131 | + ', granularity = ' + quantize_granularity |
| 132 | + print_header(header) |
| 133 | + for shape, nhid in sizes: |
| 134 | + net = FCWithSum(shape[1], nhid, elemwise_add) |
| 135 | + net.initialize() |
| 136 | + net.hybridize(static_alloc=True, static_shape=True) |
| 137 | + data0 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0) |
| 138 | + data1 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0) |
| 139 | + shape2 = (shape[0], nhid) |
| 140 | + data2 = mx.np.random.uniform(size=shape2, low=-1.0, high=1.0) |
| 141 | + data = mx.gluon.data.ArrayDataset(data0, data1, data2) |
| 142 | + calib_data = mx.gluon.data.DataLoader(data, batch_size=1) |
| 143 | + net = quantization.quantize_net(net, |
| 144 | + device=mx.cpu(), |
| 145 | + exclude_layers=None, |
| 146 | + exclude_operators=None, |
| 147 | + calib_mode='naive', |
| 148 | + calib_data=calib_data, |
| 149 | + num_calib_batches=1, |
| 150 | + quantize_mode=quantize_mode, |
| 151 | + quantize_granularity=quantize_granularity |
| 152 | + ) |
| 153 | + net.hybridize(static_alloc=True, static_shape=True) |
| 154 | + measure(net, data0, data1, data2, shape, nhid) |
| 155 | + dump_graph_fn(net, operator_string(elemwise_add) + \ |
| 156 | + '_' + str(quantize_mode) + '_' + str(quantize_granularity)) |
| 157 | + |
| 158 | +for elemwise_add in [True, False]: |
| 159 | + benchmark_float(elemwise_add) |
| 160 | + |
| 161 | +for quantize_mode in ['smart', 'full']: |
| 162 | + for quantize_granularity in ['tensor-wise', 'channel-wise']: |
| 163 | + for elemwise_add in [True, False]: |
| 164 | + benchmark_int8(quantize_mode, quantize_granularity, elemwise_add) |
0 commit comments