inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp

   1 // Copyright (C) 2019 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #include "mkldnn_quantize_node.h"
   6 #include "desc_iterator.hpp"
   7 #include <ie_layers.h>
   8 #include <string>
   9 #include <vector>
  10 #include <mkldnn_types.h>
  11 #include <mkldnn_extension_utils.h>
  12 #include <ie_memcpy.h>
  13 #include "details/caseless.hpp"
  14
  15 using namespace mkldnn;
  16 using namespace MKLDNNPlugin;
  17 using namespace InferenceEngine;
  18 using namespace InferenceEngine::details;
  19
  20 MKLDNNQuantizeNode::MKLDNNQuantizeNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng) : MKLDNNNode(layer, eng) {}
  21
  22 void MKLDNNQuantizeNode::getSupportedDescriptors() {
  23     InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
  24     if (precision != InferenceEngine::Precision::FP32)
  25         THROW_IE_EXCEPTION << "Quantize layer " << getName() << " supports only FP32 precision";
  26
  27     auto* quantizeLayer = dynamic_cast<QuantizeLayer*>(getCnnLayer().get());
  28     if (quantizeLayer == nullptr)
  29         THROW_IE_EXCEPTION << "Cannot convert Quantize layer " << getName();
  30
  31     levels = quantizeLayer->levels;
  32     if (levels <= 1)
  33         THROW_IE_EXCEPTION << "Quantize layer " << getName() << "supports only parameter levels > 1";
  34
  35     if (getParentEdges().size() != 5)
  36         THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
  37     if (getChildEdges().empty())
  38         THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
  39
  40     if (getParentEdgeAt(0)->getDims().ndims() != 4) {
  41         THROW_IE_EXCEPTION << "Quantize layer " << getName() << "supports only 4D input at edge 0";
  42     }
  43
  44     for (int i = 1; i < 5; i++) {
  45         if (getParentEdgeAt(i)->getDims().ndims() != 1 && getParentEdgeAt(i)->getDims().ndims() != 4) {
  46             THROW_IE_EXCEPTION << "Quantize layer " << getName() << "supports only 1D or 4D inputs at edge " << i;
  47         }
  48     }
  49
  50     canStorePacked = getChildEdges().size() == 1 && getChildEdgeAt(0)->getChild()->getType() == BinaryConvolution;
  51
  52     if (canStorePacked) {
  53         mkldnn::memory::data_type idt = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
  54         mkldnn::memory::data_type ddt = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::BIN);
  55         mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
  56
  57         MKLDNNMemoryDesc in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), idt, memory::nhwc);
  58         MKLDNNMemoryDesc out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), ddt, memory::nhwc);
  59
  60         InferenceEngine::SizeVector weightDims;
  61         weightDims.push_back(getParentEdgeAt(0)->getDims()[1]);
  62         MKLDNNDims blocked_weightDims(weightDims);
  63         MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, wdt, memory::x};
  64
  65
  66         std::shared_ptr<mkldnn::binarization_forward::desc> bin_conv_desc;
  67         bin_conv_desc.reset(new binarization_forward::desc(prop_kind::forward_scoring, algorithm::binarization_depthwise,
  68                                                            in_candidate, wgh_candidate, out_candidate));
  69
  70         descs.emplace_back(bin_conv_desc);
  71
  72         InferenceEngine::SizeVector dims;
  73         dims.push_back(getParentEdgeAt(0)->getDims()[1]);
  74
  75         auto InputLowBlob = dynamic_cast<TBlob<float>*>(getParentEdgeAt(1)->getParent()->getCnnLayer()->blobs["custom"].get());
  76
  77         auto inputLowData = InputLowBlob->buffer().as<float*>();
  78         int inputLowAxis = getParentEdgeAt(1)->getDims().ndims() == 1 ? 0 : 1;
  79         bool isInputLowBroadcasted = getParentEdgeAt(1)->getDims()[inputLowAxis] != dims[0];
  80
  81         for (int i = 0; i < dims[0]; i++) {
  82             binarizationThresholds.push_back(inputLowData[isInputLowBroadcasted ? 0 : i]);
  83         }
  84     }
  85 }
  86
  87 void MKLDNNQuantizeNode::initSupportedPrimitiveDescriptors() {
  88     if (!supportedPrimitiveDescriptors.empty())
  89         return;
  90
  91     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
  92     auto outputDataType = canStorePacked ? MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::BIN)
  93                                          : MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
  94
  95
  96
  97     auto same = [&] (memory::format fmt, impl_desc_type impl) -> PrimitiveDescInfo {
  98         InferenceEngine::LayerConfig config;
  99         config.dynBatchSupport = true;
 100         for (size_t i = 0; i < getParentEdges().size(); i++) {
 101             InferenceEngine::DataConfig dataConfig;
 102             dataConfig.inPlace = -1;
 103             dataConfig.constant = false;
 104
 105             if (i == 0) {
 106                 dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), inputDataType, fmt);
 107             } else {
 108                 dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), inputDataType,
 109                         getParentEdgeAt(i)->getDims().ndims() == 1 ? memory::x : memory::nchw);
 110             }
 111             config.inConfs.push_back(dataConfig);
 112         }
 113
 114         InferenceEngine::DataConfig dataConfig;
 115             dataConfig.inPlace = -1;
 116             dataConfig.constant = false;
 117             dataConfig.desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, fmt);
 118             config.outConfs.push_back(dataConfig);
 119         return {config, impl};
 120     };
 121
 122     supportedPrimitiveDescriptors.push_back(same(memory::nhwc, ref_any));
 123
 124     if (canStorePacked) {
 125         primitive_desc_iterator itpd = descs[0].createPrimitiveDescriptorIterator(getEngine());
 126         do {
 127             impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
 128             supportedPrimitiveDescriptors.push_back(same(memory::nhwc, impl_type));
 129         } while (itpd.next());
 130     }
 131 }
 132
 133 void MKLDNNQuantizeNode::createPrimitive() {
 134     if (prim)
 135         return;
 136
 137     auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
 138     auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
 139     if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
 140         THROW_IE_EXCEPTION << "Destination memory isn't allocated.";
 141     if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
 142         THROW_IE_EXCEPTION << "Input memory isn't allocated.";
 143     if (getSelectedPrimitiveDescriptor() == nullptr)
 144         THROW_IE_EXCEPTION << "Preferable primitive descriptor isn't set.";
 145
 146     if (canStorePacked) {
 147         auto prim_desc = createPrimitiveDescriptor<binarization_forward::primitive_desc, binarization_forward::desc>();
 148
 149         MKLDNNMemoryDesc binarizationDataDesc = {{getParentEdgeAt(0)->getDims()[1]}, memory::f32, memory::x};
 150         auto binarizationDataMem = std::make_shared<MKLDNNMemory>(getEngine());
 151         binarizationDataMem->Create(binarizationDataDesc, &binarizationThresholds[0]);
 152         internalBlobMemory.push_back(binarizationDataMem);
 153
 154         prim.reset(new binarization_forward(prim_desc, getParentEdgeAt(0)->getMemory().GetPrimitive(),
 155                                             internalBlobMemory[0]->GetPrimitive(),
 156                                             getChildEdgeAt(0)->getMemory().GetPrimitive()));
 157     }
 158 }
 159
 160 void MKLDNNQuantizeNode::execute(mkldnn::stream strm) {
 161     if (prim) {
 162         MKLDNNNode::execute(strm);
 163     } else {
 164         auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr();
 165         auto &inputLowMemory = getParentEdgeAt(1)->getMemoryPtr();
 166         auto &inputHighMemory = getParentEdgeAt(2)->getMemoryPtr();
 167         auto &outputLowMemory = getParentEdgeAt(3)->getMemoryPtr();
 168         auto &outputHighMemory = getParentEdgeAt(4)->getMemoryPtr();
 169         auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr();
 170
 171         auto srcData = reinterpret_cast<const float *>(srcMemory->GetData());
 172         auto inputLowData = reinterpret_cast<const float *>(inputLowMemory->GetData());
 173         auto inputHighData = reinterpret_cast<const float *>(inputHighMemory->GetData());
 174         auto outputLowData = reinterpret_cast<const float *>(outputLowMemory->GetData());
 175         auto outputHighData = reinterpret_cast<const float *>(outputHighMemory->GetData());
 176         auto dstData = reinterpret_cast<float *>(dstMemory->GetData());
 177
 178         srcData += srcMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
 179         inputLowData += inputLowMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
 180         inputHighData += inputHighMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
 181         outputLowData += outputLowMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
 182         outputHighData += outputHighMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
 183         dstData += dstMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
 184
 185         size_t N = static_cast<size_t>(batchToProcess());
 186         size_t C = static_cast<size_t>(srcMemory->GetDims()[1]);
 187         size_t H = static_cast<size_t>(srcMemory->GetDims()[2]);
 188         size_t W = static_cast<size_t>(srcMemory->GetDims()[3]);
 189
 190         int inputLowAxis = inputLowMemory->GetDims().size() == 1 ? 0 : 1;
 191         bool isInputLowBroadcasted = inputLowMemory->GetDims()[inputLowAxis] != C;
 192
 193         int inputHighAxis = inputHighMemory->GetDims().size() == 1 ? 0 : 1;
 194         bool isInputHighBroadcasted = inputHighMemory->GetDims()[inputHighAxis] != C;
 195
 196         int outputLowAxis = outputLowMemory->GetDims().size() == 1 ? 0 : 1;
 197         bool isOutputLowBroadcasted = outputLowMemory->GetDims()[outputLowAxis] != C;
 198
 199         int outputHighAxis = outputHighMemory->GetDims().size() == 1 ? 0 : 1;
 200         bool isOutputHighBroadcasted = outputHighMemory->GetDims()[outputHighAxis] != C;
 201
 202         for (int n = 0; n < N; n++) {
 203             for (int h = 0; h < H; h++) {
 204                 for (int w = 0; w < W; w++) {
 205                     for (int c = 0; c < C; c++) {
 206                         size_t idx = n * H * W * C + h * W * C + w * C + c;
 207
 208                         float inputLow = inputLowData[isInputLowBroadcasted ? 0 : c];
 209                         float inputHigh = inputHighData[isInputHighBroadcasted ? 0 : c];
 210                         float outputLow = outputLowData[isOutputLowBroadcasted ? 0 : c];
 211                         float outputHigh = outputHighData[isOutputHighBroadcasted ? 0 : c];
 212
 213                         if (srcData[idx] <= inputLow)
 214                             dstData[idx] = outputLow;
 215                         else if (srcData[idx] > inputHigh)
 216                             dstData[idx] = outputHigh;
 217                         else
 218                             dstData[idx] = roundf((srcData[idx] - inputLow) / (inputHigh - inputLow) * (levels - 1)) /
 219                                            (levels - 1) * (outputHigh - outputLow) + outputLow;
 220                     }
 221                 }
 222             }
 223         }
 224     }
 225 }
 226
 227 bool MKLDNNQuantizeNode::created() const {
 228     return getType() == Quantize;
 229 }