1 /*******************************************************************************
2 * Copyright 2018 Intel Corporation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *******************************************************************************/
22 using namespace mkldnn;
24 void simple_net_int8() {
25 auto cpu_engine = engine(engine::cpu, 0);
27 /* Create a vector to store the topology primitives */
28 std::vector<primitive> net;
33 * {batch, 256, 13, 13} (x) {384, 256, 3, 3}; -> {batch, 384, 13, 13}
36 memory::dims conv_src_tz = { batch, 256, 13, 13 };
37 memory::dims conv_weights_tz = { 384, 256, 3, 3 };
38 memory::dims conv_bias_tz = { 384 };
39 memory::dims conv_dst_tz = { batch, 384, 13, 13 };
40 memory::dims conv_strides = { 1, 1 };
41 memory::dims conv_padding = { 1, 1 };
43 /* Set Scaling mode for int8 quantizing */
44 const std::vector<float> src_scales = { 1.8f };
45 const std::vector<float> weight_scales = { 2.0f };
46 const std::vector<float> bias_scales = { 1.0f };
47 const std::vector<float> dst_scales = { 0.55f };
48 /* assign halves of vector with arbitrary values */
49 std::vector<float> conv_scales(384);
50 const int scales_half = 384 / 2;
51 std::fill(conv_scales.begin(), conv_scales.begin() + scales_half, 0.3f);
52 std::fill(conv_scales.begin() + scales_half + 1, conv_scales.end(), 0.8f);
54 const int src_mask = 0;
55 const int weight_mask = 0;
56 const int bias_mask = 0;
57 const int dst_mask = 0;
58 const int conv_mask = 2; // 1 << output_channel_dim
60 /* Allocate input and output buffers for user data */
61 std::vector<float> user_src(batch * 256 * 13 * 13);
62 std::vector<float> user_dst(batch * 384 * 13 * 13);
64 /* Allocate and fill buffers for weights and bias */
65 std::vector<float> conv_weights(std::accumulate(conv_weights_tz.begin(),
66 conv_weights_tz.end(), 1, std::multiplies<uint32_t>()));
67 std::vector<float> conv_bias(std::accumulate(conv_bias_tz.begin(),
68 conv_bias_tz.end(), 1, std::multiplies<uint32_t>()));
70 /* create memory for user data */
71 auto user_src_memory = memory(
72 { { { conv_src_tz }, memory::data_type::f32, memory::format::nchw },
75 auto user_weights_memory
76 = memory({ { { conv_weights_tz }, memory::data_type::f32,
77 memory::format::oihw },
80 auto user_bias_memory = memory(
81 { { { conv_bias_tz }, memory::data_type::f32, memory::format::x },
85 /* create memory descriptors for convolution data w/ no specified format */
86 auto conv_src_md = memory::desc(
87 { conv_src_tz }, memory::data_type::u8, memory::format::any);
88 auto conv_bias_md = memory::desc(
89 { conv_bias_tz }, memory::data_type::s8, memory::format::any);
90 auto conv_weights_md = memory::desc(
91 { conv_weights_tz }, memory::data_type::s8, memory::format::any);
92 auto conv_dst_md = memory::desc(
93 { conv_dst_tz }, memory::data_type::u8, memory::format::any);
95 /* create a convolution */
96 auto conv_desc = convolution_forward::desc(prop_kind::forward,
97 convolution_direct, conv_src_md, conv_weights_md, conv_bias_md,
98 conv_dst_md, conv_strides, conv_padding, conv_padding,
101 /* define the convolution attributes */
102 primitive_attr conv_attr;
103 conv_attr.set_int_output_round_mode(round_mode::round_nearest);
104 conv_attr.set_output_scales(conv_mask, conv_scales);
106 /* AlexNet: execute ReLU as PostOps */
107 const float ops_scale = 1.f;
108 const float ops_alpha = 0.f; // relu negative slope
109 const float ops_beta = 0.f;
111 ops.append_eltwise(ops_scale, algorithm::eltwise_relu, ops_alpha, ops_beta);
112 conv_attr.set_post_ops(ops);
114 /* check if int8 convolution is supported */
116 auto conv_prim_desc = convolution_forward::primitive_desc(
117 conv_desc, conv_attr, cpu_engine);
119 if (e.status == mkldnn_unimplemented) {
120 std::cerr << "AVX512-BW support or Intel(R) MKL dependency is "
121 "required for int8 convolution" << std::endl;
126 auto conv_prim_desc = convolution_forward::primitive_desc(
127 conv_desc, conv_attr, cpu_engine);
129 /* Next: create memory primitives for the convolution's input data
130 * and use reorder to quantize the values into int8 */
131 auto conv_src_memory = memory(conv_prim_desc.src_primitive_desc());
132 primitive_attr src_attr;
133 src_attr.set_int_output_round_mode(round_mode::round_nearest);
134 src_attr.set_output_scales(src_mask, src_scales);
136 = reorder::primitive_desc(user_src_memory.get_primitive_desc(),
137 conv_src_memory.get_primitive_desc(), src_attr);
138 net.push_back(reorder(src_reorder_pd, user_src_memory, conv_src_memory));
140 auto conv_weights_memory = memory(conv_prim_desc.weights_primitive_desc());
141 primitive_attr weight_attr;
142 weight_attr.set_int_output_round_mode(round_mode::round_nearest);
143 weight_attr.set_output_scales(weight_mask, weight_scales);
144 auto weight_reorder_pd
145 = reorder::primitive_desc(user_weights_memory.get_primitive_desc(),
146 conv_weights_memory.get_primitive_desc(), weight_attr);
147 net.push_back(reorder(
148 weight_reorder_pd, user_weights_memory, conv_weights_memory));
150 auto conv_bias_memory = memory(conv_prim_desc.bias_primitive_desc());
151 primitive_attr bias_attr;
152 bias_attr.set_int_output_round_mode(round_mode::round_nearest);
153 bias_attr.set_output_scales(bias_mask, bias_scales);
155 = reorder::primitive_desc(user_bias_memory.get_primitive_desc(),
156 conv_bias_memory.get_primitive_desc(), bias_attr);
157 net.push_back(reorder(bias_reorder_pd, user_bias_memory, conv_bias_memory));
159 auto conv_dst_memory = memory(conv_prim_desc.dst_primitive_desc());
161 /* create convolution primitive and add it to net */
162 net.push_back(convolution_forward(conv_prim_desc, conv_src_memory,
163 conv_weights_memory, conv_bias_memory, conv_dst_memory));
165 /* Convert data back into fp32 and compare values with u8.
166 * Note: data is unsigned since there are no negative values
169 /* Create a memory primitive for user data output */
170 auto user_dst_memory = memory(
171 { { { conv_dst_tz }, memory::data_type::f32, memory::format::nchw },
175 primitive_attr dst_attr;
176 dst_attr.set_int_output_round_mode(round_mode::round_nearest);
177 dst_attr.set_output_scales(dst_mask, dst_scales);
179 = reorder::primitive_desc(conv_dst_memory.get_primitive_desc(),
180 user_dst_memory.get_primitive_desc(), dst_attr);
182 /* Convert the destination memory from convolution into user
183 * data format if necessary */
184 if (conv_dst_memory != user_dst_memory) {
186 reorder(dst_reorder_pd, conv_dst_memory, user_dst_memory));
189 stream(stream::kind::eager).submit(net).wait();
193 int main(int argc, char **argv) {
196 * On convolution creating: check for Intel(R) MKL dependency execution.
197 * output: warning if not found. */
199 std::cout << "Simple-net-int8 example passed!" << std::endl;
201 std::cerr << "status: " << e.status << std::endl;
202 std::cerr << "message: " << e.message << std::endl;