1 // Copyright (C) 2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include "mkldnn_quantize_node.h"
6 #include "desc_iterator.hpp"
10 #include <mkldnn_types.h>
11 #include <mkldnn_extension_utils.h>
12 #include <ie_memcpy.h>
13 #include "details/caseless.hpp"
15 using namespace mkldnn;
16 using namespace MKLDNNPlugin;
17 using namespace InferenceEngine;
18 using namespace InferenceEngine::details;
20 MKLDNNQuantizeNode::MKLDNNQuantizeNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng) : MKLDNNNode(layer, eng) {}
22 void MKLDNNQuantizeNode::getSupportedDescriptors() {
23 InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
24 if (precision != InferenceEngine::Precision::FP32)
25 THROW_IE_EXCEPTION << "Quantize layer " << getName() << " supports only FP32 precision";
27 auto* quantizeLayer = dynamic_cast<QuantizeLayer*>(getCnnLayer().get());
28 if (quantizeLayer == nullptr)
29 THROW_IE_EXCEPTION << "Cannot convert Quantize layer " << getName();
31 levels = quantizeLayer->levels;
33 THROW_IE_EXCEPTION << "Quantize layer " << getName() << "supports only parameter levels > 1";
35 if (getParentEdges().size() != 5)
36 THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
37 if (getChildEdges().empty())
38 THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
40 if (getParentEdgeAt(0)->getDims().ndims() != 4) {
41 THROW_IE_EXCEPTION << "Quantize layer " << getName() << "supports only 4D input at edge 0";
44 for (int i = 1; i < 5; i++) {
45 if (getParentEdgeAt(i)->getDims().ndims() != 1 && getParentEdgeAt(i)->getDims().ndims() != 4) {
46 THROW_IE_EXCEPTION << "Quantize layer " << getName() << "supports only 1D or 4D inputs at edge " << i;
50 canStorePacked = getChildEdges().size() == 1 && getChildEdgeAt(0)->getChild()->getType() == BinaryConvolution;
53 mkldnn::memory::data_type idt = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
54 mkldnn::memory::data_type ddt = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::BIN);
55 mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
57 MKLDNNMemoryDesc in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), idt, memory::nhwc);
58 MKLDNNMemoryDesc out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), ddt, memory::nhwc);
60 InferenceEngine::SizeVector weightDims;
61 weightDims.push_back(getParentEdgeAt(0)->getDims()[1]);
62 MKLDNNDims blocked_weightDims(weightDims);
63 MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, wdt, memory::x};
66 std::shared_ptr<mkldnn::binarization_forward::desc> bin_conv_desc;
67 bin_conv_desc.reset(new binarization_forward::desc(prop_kind::forward_scoring, algorithm::binarization_depthwise,
68 in_candidate, wgh_candidate, out_candidate));
70 descs.emplace_back(bin_conv_desc);
72 InferenceEngine::SizeVector dims;
73 dims.push_back(getParentEdgeAt(0)->getDims()[1]);
75 auto InputLowBlob = dynamic_cast<TBlob<float>*>(getParentEdgeAt(1)->getParent()->getCnnLayer()->blobs["custom"].get());
77 auto inputLowData = InputLowBlob->buffer().as<float*>();
78 int inputLowAxis = getParentEdgeAt(1)->getDims().ndims() == 1 ? 0 : 1;
79 bool isInputLowBroadcasted = getParentEdgeAt(1)->getDims()[inputLowAxis] != dims[0];
81 for (int i = 0; i < dims[0]; i++) {
82 binarizationThresholds.push_back(inputLowData[isInputLowBroadcasted ? 0 : i]);
87 void MKLDNNQuantizeNode::initSupportedPrimitiveDescriptors() {
88 if (!supportedPrimitiveDescriptors.empty())
91 auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
92 auto outputDataType = canStorePacked ? MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::BIN)
93 : MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
97 auto same = [&] (memory::format fmt, impl_desc_type impl) -> PrimitiveDescInfo {
98 InferenceEngine::LayerConfig config;
99 config.dynBatchSupport = true;
100 for (size_t i = 0; i < getParentEdges().size(); i++) {
101 InferenceEngine::DataConfig dataConfig;
102 dataConfig.inPlace = -1;
103 dataConfig.constant = false;
106 dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), inputDataType, fmt);
108 dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), inputDataType,
109 getParentEdgeAt(i)->getDims().ndims() == 1 ? memory::x : memory::nchw);
111 config.inConfs.push_back(dataConfig);
114 InferenceEngine::DataConfig dataConfig;
115 dataConfig.inPlace = -1;
116 dataConfig.constant = false;
117 dataConfig.desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, fmt);
118 config.outConfs.push_back(dataConfig);
119 return {config, impl};
122 supportedPrimitiveDescriptors.push_back(same(memory::nhwc, ref_any));
124 if (canStorePacked) {
125 primitive_desc_iterator itpd = descs[0].createPrimitiveDescriptorIterator(getEngine());
127 impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
128 supportedPrimitiveDescriptors.push_back(same(memory::nhwc, impl_type));
129 } while (itpd.next());
133 void MKLDNNQuantizeNode::createPrimitive() {
137 auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
138 auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
139 if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
140 THROW_IE_EXCEPTION << "Destination memory isn't allocated.";
141 if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
142 THROW_IE_EXCEPTION << "Input memory isn't allocated.";
143 if (getSelectedPrimitiveDescriptor() == nullptr)
144 THROW_IE_EXCEPTION << "Preferable primitive descriptor isn't set.";
146 if (canStorePacked) {
147 auto prim_desc = createPrimitiveDescriptor<binarization_forward::primitive_desc, binarization_forward::desc>();
149 MKLDNNMemoryDesc binarizationDataDesc = {{getParentEdgeAt(0)->getDims()[1]}, memory::f32, memory::x};
150 auto binarizationDataMem = std::make_shared<MKLDNNMemory>(getEngine());
151 binarizationDataMem->Create(binarizationDataDesc, &binarizationThresholds[0]);
152 internalBlobMemory.push_back(binarizationDataMem);
154 prim.reset(new binarization_forward(prim_desc, getParentEdgeAt(0)->getMemory().GetPrimitive(),
155 internalBlobMemory[0]->GetPrimitive(),
156 getChildEdgeAt(0)->getMemory().GetPrimitive()));
160 void MKLDNNQuantizeNode::execute(mkldnn::stream strm) {
162 MKLDNNNode::execute(strm);
164 auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr();
165 auto &inputLowMemory = getParentEdgeAt(1)->getMemoryPtr();
166 auto &inputHighMemory = getParentEdgeAt(2)->getMemoryPtr();
167 auto &outputLowMemory = getParentEdgeAt(3)->getMemoryPtr();
168 auto &outputHighMemory = getParentEdgeAt(4)->getMemoryPtr();
169 auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr();
171 auto srcData = reinterpret_cast<const float *>(srcMemory->GetData());
172 auto inputLowData = reinterpret_cast<const float *>(inputLowMemory->GetData());
173 auto inputHighData = reinterpret_cast<const float *>(inputHighMemory->GetData());
174 auto outputLowData = reinterpret_cast<const float *>(outputLowMemory->GetData());
175 auto outputHighData = reinterpret_cast<const float *>(outputHighMemory->GetData());
176 auto dstData = reinterpret_cast<float *>(dstMemory->GetData());
178 srcData += srcMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
179 inputLowData += inputLowMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
180 inputHighData += inputHighMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
181 outputLowData += outputLowMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
182 outputHighData += outputHighMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
183 dstData += dstMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
185 size_t N = static_cast<size_t>(batchToProcess());
186 size_t C = static_cast<size_t>(srcMemory->GetDims()[1]);
187 size_t H = static_cast<size_t>(srcMemory->GetDims()[2]);
188 size_t W = static_cast<size_t>(srcMemory->GetDims()[3]);
190 int inputLowAxis = inputLowMemory->GetDims().size() == 1 ? 0 : 1;
191 bool isInputLowBroadcasted = inputLowMemory->GetDims()[inputLowAxis] != C;
193 int inputHighAxis = inputHighMemory->GetDims().size() == 1 ? 0 : 1;
194 bool isInputHighBroadcasted = inputHighMemory->GetDims()[inputHighAxis] != C;
196 int outputLowAxis = outputLowMemory->GetDims().size() == 1 ? 0 : 1;
197 bool isOutputLowBroadcasted = outputLowMemory->GetDims()[outputLowAxis] != C;
199 int outputHighAxis = outputHighMemory->GetDims().size() == 1 ? 0 : 1;
200 bool isOutputHighBroadcasted = outputHighMemory->GetDims()[outputHighAxis] != C;
202 for (int n = 0; n < N; n++) {
203 for (int h = 0; h < H; h++) {
204 for (int w = 0; w < W; w++) {
205 for (int c = 0; c < C; c++) {
206 size_t idx = n * H * W * C + h * W * C + w * C + c;
208 float inputLow = inputLowData[isInputLowBroadcasted ? 0 : c];
209 float inputHigh = inputHighData[isInputHighBroadcasted ? 0 : c];
210 float outputLow = outputLowData[isOutputLowBroadcasted ? 0 : c];
211 float outputHigh = outputHighData[isOutputHighBroadcasted ? 0 : c];
213 if (srcData[idx] <= inputLow)
214 dstData[idx] = outputLow;
215 else if (srcData[idx] > inputHigh)
216 dstData[idx] = outputHigh;
218 dstData[idx] = roundf((srcData[idx] - inputLow) / (inputHigh - inputLow) * (levels - 1)) /
219 (levels - 1) * (outputHigh - outputLow) + outputLow;
227 bool MKLDNNQuantizeNode::created() const {
228 return getType() == Quantize;