inference-engine/thirdparty/mkl-dnn/examples/simple_net_int8.cpp

   1 /*******************************************************************************
   2 * Copyright 2018 Intel Corporation
   3 *
   4 * Licensed under the Apache License, Version 2.0 (the "License");
   5 * you may not use this file except in compliance with the License.
   6 * You may obtain a copy of the License at
   7 *
   8 *     http://www.apache.org/licenses/LICENSE-2.0
   9 *
  10 * Unless required by applicable law or agreed to in writing, software
  11 * distributed under the License is distributed on an "AS IS" BASIS,
  12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 * See the License for the specific language governing permissions and
  14 * limitations under the License.
  15 *******************************************************************************/
  16
  17 #include <iostream>
  18 #include <numeric>
  19 #include <string>
  20 #include "mkldnn.hpp"
  21
  22 using namespace mkldnn;
  23
  24 void simple_net_int8() {
  25     auto cpu_engine = engine(engine::cpu, 0);
  26
  27     /* Create a vector to store the topology primitives */
  28     std::vector<primitive> net;
  29
  30     const int batch = 8;
  31
  32     /* AlexNet: conv3
  33      * {batch, 256, 13, 13} (x)  {384, 256, 3, 3}; -> {batch, 384, 13, 13}
  34      * strides: {1, 1}
  35      */
  36     memory::dims conv_src_tz = { batch, 256, 13, 13 };
  37     memory::dims conv_weights_tz = { 384, 256, 3, 3 };
  38     memory::dims conv_bias_tz = { 384 };
  39     memory::dims conv_dst_tz = { batch, 384, 13, 13 };
  40     memory::dims conv_strides = { 1, 1 };
  41     memory::dims conv_padding = { 1, 1 };
  42
  43     /* Set Scaling mode for int8 quantizing */
  44     const std::vector<float> src_scales = { 1.8f };
  45     const std::vector<float> weight_scales = { 2.0f };
  46     const std::vector<float> bias_scales = { 1.0f };
  47     const std::vector<float> dst_scales = { 0.55f };
  48     /* assign halves of vector with arbitrary values */
  49     std::vector<float> conv_scales(384);
  50     const int scales_half = 384 / 2;
  51     std::fill(conv_scales.begin(), conv_scales.begin() + scales_half, 0.3f);
  52     std::fill(conv_scales.begin() + scales_half + 1, conv_scales.end(), 0.8f);
  53
  54     const int src_mask = 0;
  55     const int weight_mask = 0;
  56     const int bias_mask = 0;
  57     const int dst_mask = 0;
  58     const int conv_mask = 2; // 1 << output_channel_dim
  59
  60     /* Allocate input and output buffers for user data */
  61     std::vector<float> user_src(batch * 256 * 13 * 13);
  62     std::vector<float> user_dst(batch * 384 * 13 * 13);
  63
  64     /* Allocate and fill buffers for weights and bias */
  65     std::vector<float> conv_weights(std::accumulate(conv_weights_tz.begin(),
  66             conv_weights_tz.end(), 1, std::multiplies<uint32_t>()));
  67     std::vector<float> conv_bias(std::accumulate(conv_bias_tz.begin(),
  68             conv_bias_tz.end(), 1, std::multiplies<uint32_t>()));
  69
  70     /* create memory for user data */
  71     auto user_src_memory = memory(
  72             { { { conv_src_tz }, memory::data_type::f32, memory::format::nchw },
  73                     cpu_engine },
  74             user_src.data());
  75     auto user_weights_memory
  76             = memory({ { { conv_weights_tz }, memory::data_type::f32,
  77                                memory::format::oihw },
  78                              cpu_engine },
  79                     conv_weights.data());
  80     auto user_bias_memory = memory(
  81             { { { conv_bias_tz }, memory::data_type::f32, memory::format::x },
  82                     cpu_engine },
  83             conv_bias.data());
  84
  85     /* create memory descriptors for convolution data w/ no specified format */
  86     auto conv_src_md = memory::desc(
  87             { conv_src_tz }, memory::data_type::u8, memory::format::any);
  88     auto conv_bias_md = memory::desc(
  89             { conv_bias_tz }, memory::data_type::s8, memory::format::any);
  90     auto conv_weights_md = memory::desc(
  91             { conv_weights_tz }, memory::data_type::s8, memory::format::any);
  92     auto conv_dst_md = memory::desc(
  93             { conv_dst_tz }, memory::data_type::u8, memory::format::any);
  94
  95     /* create a convolution */
  96     auto conv_desc = convolution_forward::desc(prop_kind::forward,
  97             convolution_direct, conv_src_md, conv_weights_md, conv_bias_md,
  98             conv_dst_md, conv_strides, conv_padding, conv_padding,
  99             padding_kind::zero);
 100
 101     /* define the convolution attributes */
 102     primitive_attr conv_attr;
 103     conv_attr.set_int_output_round_mode(round_mode::round_nearest);
 104     conv_attr.set_output_scales(conv_mask, conv_scales);
 105
 106     /* AlexNet: execute ReLU as PostOps */
 107     const float ops_scale = 1.f;
 108     const float ops_alpha = 0.f; // relu negative slope
 109     const float ops_beta = 0.f;
 110     post_ops ops;
 111     ops.append_eltwise(ops_scale, algorithm::eltwise_relu, ops_alpha, ops_beta);
 112     conv_attr.set_post_ops(ops);
 113
 114     /* check if int8 convolution is supported */
 115     try {
 116         auto conv_prim_desc = convolution_forward::primitive_desc(
 117                 conv_desc, conv_attr, cpu_engine);
 118     } catch (error &e) {
 119         if (e.status == mkldnn_unimplemented) {
 120             std::cerr << "AVX512-BW support or Intel(R) MKL dependency is "
 121             "required for int8 convolution" << std::endl;
 122         }
 123         throw;
 124     }
 125
 126     auto conv_prim_desc = convolution_forward::primitive_desc(
 127             conv_desc, conv_attr, cpu_engine);
 128
 129     /* Next: create memory primitives for the convolution's input data
 130      * and use reorder to quantize the values into int8 */
 131     auto conv_src_memory = memory(conv_prim_desc.src_primitive_desc());
 132     primitive_attr src_attr;
 133     src_attr.set_int_output_round_mode(round_mode::round_nearest);
 134     src_attr.set_output_scales(src_mask, src_scales);
 135     auto src_reorder_pd
 136             = reorder::primitive_desc(user_src_memory.get_primitive_desc(),
 137                     conv_src_memory.get_primitive_desc(), src_attr);
 138     net.push_back(reorder(src_reorder_pd, user_src_memory, conv_src_memory));
 139
 140     auto conv_weights_memory = memory(conv_prim_desc.weights_primitive_desc());
 141     primitive_attr weight_attr;
 142     weight_attr.set_int_output_round_mode(round_mode::round_nearest);
 143     weight_attr.set_output_scales(weight_mask, weight_scales);
 144     auto weight_reorder_pd
 145             = reorder::primitive_desc(user_weights_memory.get_primitive_desc(),
 146                     conv_weights_memory.get_primitive_desc(), weight_attr);
 147     net.push_back(reorder(
 148             weight_reorder_pd, user_weights_memory, conv_weights_memory));
 149
 150     auto conv_bias_memory = memory(conv_prim_desc.bias_primitive_desc());
 151     primitive_attr bias_attr;
 152     bias_attr.set_int_output_round_mode(round_mode::round_nearest);
 153     bias_attr.set_output_scales(bias_mask, bias_scales);
 154     auto bias_reorder_pd
 155             = reorder::primitive_desc(user_bias_memory.get_primitive_desc(),
 156                     conv_bias_memory.get_primitive_desc(), bias_attr);
 157     net.push_back(reorder(bias_reorder_pd, user_bias_memory, conv_bias_memory));
 158
 159     auto conv_dst_memory = memory(conv_prim_desc.dst_primitive_desc());
 160
 161     /* create convolution primitive and add it to net */
 162     net.push_back(convolution_forward(conv_prim_desc, conv_src_memory,
 163             conv_weights_memory, conv_bias_memory, conv_dst_memory));
 164
 165     /* Convert data back into fp32 and compare values with u8.
 166      * Note: data is unsigned since there are no negative values
 167      * after ReLU */
 168
 169     /* Create a memory primitive for user data output */
 170     auto user_dst_memory = memory(
 171             { { { conv_dst_tz }, memory::data_type::f32, memory::format::nchw },
 172                     cpu_engine },
 173             user_dst.data());
 174
 175     primitive_attr dst_attr;
 176     dst_attr.set_int_output_round_mode(round_mode::round_nearest);
 177     dst_attr.set_output_scales(dst_mask, dst_scales);
 178     auto dst_reorder_pd
 179             = reorder::primitive_desc(conv_dst_memory.get_primitive_desc(),
 180                     user_dst_memory.get_primitive_desc(), dst_attr);
 181
 182     /* Convert the destination memory from convolution into user
 183      * data format if necessary */
 184     if (conv_dst_memory != user_dst_memory) {
 185         net.push_back(
 186                 reorder(dst_reorder_pd, conv_dst_memory, user_dst_memory));
 187     }
 188
 189     stream(stream::kind::eager).submit(net).wait();
 190
 191 }
 192
 193 int main(int argc, char **argv) {
 194     try {
 195         /* Notes:
 196          * On convolution creating: check for Intel(R) MKL dependency execution.
 197          * output: warning if not found. */
 198         simple_net_int8();
 199         std::cout << "Simple-net-int8 example passed!" << std::endl;
 200     } catch (error &e) {
 201         std::cerr << "status: " << e.status << std::endl;
 202         std::cerr << "message: " << e.message << std::endl;
 203     }
 204     return 0;
 205 }