Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / src / mkldnn_plugin / nodes / mkldnn_quantize_node.cpp
1 // Copyright (C) 2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include "mkldnn_quantize_node.h"
6 #include "desc_iterator.hpp"
7 #include <ie_layers.h>
8 #include <string>
9 #include <vector>
10 #include <mkldnn_types.h>
11 #include <mkldnn_extension_utils.h>
12 #include <ie_memcpy.h>
13 #include "details/caseless.hpp"
14
15 using namespace mkldnn;
16 using namespace MKLDNNPlugin;
17 using namespace InferenceEngine;
18 using namespace InferenceEngine::details;
19
20 MKLDNNQuantizeNode::MKLDNNQuantizeNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng) : MKLDNNNode(layer, eng) {}
21
22 void MKLDNNQuantizeNode::getSupportedDescriptors() {
23     InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
24     if (precision != InferenceEngine::Precision::FP32)
25         THROW_IE_EXCEPTION << "Quantize layer " << getName() << " supports only FP32 precision";
26
27     auto* quantizeLayer = dynamic_cast<QuantizeLayer*>(getCnnLayer().get());
28     if (quantizeLayer == nullptr)
29         THROW_IE_EXCEPTION << "Cannot convert Quantize layer " << getName();
30
31     levels = quantizeLayer->levels;
32     if (levels <= 1)
33         THROW_IE_EXCEPTION << "Quantize layer " << getName() << "supports only parameter levels > 1";
34
35     if (getParentEdges().size() != 5)
36         THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
37     if (getChildEdges().empty())
38         THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
39
40     if (getParentEdgeAt(0)->getDims().ndims() != 4) {
41         THROW_IE_EXCEPTION << "Quantize layer " << getName() << "supports only 4D input at edge 0";
42     }
43
44     for (int i = 1; i < 5; i++) {
45         if (getParentEdgeAt(i)->getDims().ndims() != 1 && getParentEdgeAt(i)->getDims().ndims() != 4) {
46             THROW_IE_EXCEPTION << "Quantize layer " << getName() << "supports only 1D or 4D inputs at edge " << i;
47         }
48     }
49
50     canStorePacked = getChildEdges().size() == 1 && getChildEdgeAt(0)->getChild()->getType() == BinaryConvolution;
51
52     if (canStorePacked) {
53         mkldnn::memory::data_type idt = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
54         mkldnn::memory::data_type ddt = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::BIN);
55         mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
56
57         MKLDNNMemoryDesc in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), idt, memory::nhwc);
58         MKLDNNMemoryDesc out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), ddt, memory::nhwc);
59
60         InferenceEngine::SizeVector weightDims;
61         weightDims.push_back(getParentEdgeAt(0)->getDims()[1]);
62         MKLDNNDims blocked_weightDims(weightDims);
63         MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, wdt, memory::x};
64
65
66         std::shared_ptr<mkldnn::binarization_forward::desc> bin_conv_desc;
67         bin_conv_desc.reset(new binarization_forward::desc(prop_kind::forward_scoring, algorithm::binarization_depthwise,
68                                                            in_candidate, wgh_candidate, out_candidate));
69
70         descs.emplace_back(bin_conv_desc);
71
72         InferenceEngine::SizeVector dims;
73         dims.push_back(getParentEdgeAt(0)->getDims()[1]);
74
75         auto InputLowBlob = dynamic_cast<TBlob<float>*>(getParentEdgeAt(1)->getParent()->getCnnLayer()->blobs["custom"].get());
76
77         auto inputLowData = InputLowBlob->buffer().as<float*>();
78         int inputLowAxis = getParentEdgeAt(1)->getDims().ndims() == 1 ? 0 : 1;
79         bool isInputLowBroadcasted = getParentEdgeAt(1)->getDims()[inputLowAxis] != dims[0];
80
81         for (int i = 0; i < dims[0]; i++) {
82             binarizationThresholds.push_back(inputLowData[isInputLowBroadcasted ? 0 : i]);
83         }
84     }
85 }
86
87 void MKLDNNQuantizeNode::initSupportedPrimitiveDescriptors() {
88     if (!supportedPrimitiveDescriptors.empty())
89         return;
90
91     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
92     auto outputDataType = canStorePacked ? MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::BIN)
93                                          : MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
94
95
96
97     auto same = [&] (memory::format fmt, impl_desc_type impl) -> PrimitiveDescInfo {
98         InferenceEngine::LayerConfig config;
99         config.dynBatchSupport = true;
100         for (size_t i = 0; i < getParentEdges().size(); i++) {
101             InferenceEngine::DataConfig dataConfig;
102             dataConfig.inPlace = -1;
103             dataConfig.constant = false;
104
105             if (i == 0) {
106                 dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), inputDataType, fmt);
107             } else {
108                 dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), inputDataType,
109                         getParentEdgeAt(i)->getDims().ndims() == 1 ? memory::x : memory::nchw);
110             }
111             config.inConfs.push_back(dataConfig);
112         }
113
114         InferenceEngine::DataConfig dataConfig;
115             dataConfig.inPlace = -1;
116             dataConfig.constant = false;
117             dataConfig.desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, fmt);
118             config.outConfs.push_back(dataConfig);
119         return {config, impl};
120     };
121
122     supportedPrimitiveDescriptors.push_back(same(memory::nhwc, ref_any));
123
124     if (canStorePacked) {
125         primitive_desc_iterator itpd = descs[0].createPrimitiveDescriptorIterator(getEngine());
126         do {
127             impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
128             supportedPrimitiveDescriptors.push_back(same(memory::nhwc, impl_type));
129         } while (itpd.next());
130     }
131 }
132
133 void MKLDNNQuantizeNode::createPrimitive() {
134     if (prim)
135         return;
136
137     auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
138     auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
139     if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
140         THROW_IE_EXCEPTION << "Destination memory isn't allocated.";
141     if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
142         THROW_IE_EXCEPTION << "Input memory isn't allocated.";
143     if (getSelectedPrimitiveDescriptor() == nullptr)
144         THROW_IE_EXCEPTION << "Preferable primitive descriptor isn't set.";
145
146     if (canStorePacked) {
147         auto prim_desc = createPrimitiveDescriptor<binarization_forward::primitive_desc, binarization_forward::desc>();
148
149         MKLDNNMemoryDesc binarizationDataDesc = {{getParentEdgeAt(0)->getDims()[1]}, memory::f32, memory::x};
150         auto binarizationDataMem = std::make_shared<MKLDNNMemory>(getEngine());
151         binarizationDataMem->Create(binarizationDataDesc, &binarizationThresholds[0]);
152         internalBlobMemory.push_back(binarizationDataMem);
153
154         prim.reset(new binarization_forward(prim_desc, getParentEdgeAt(0)->getMemory().GetPrimitive(),
155                                             internalBlobMemory[0]->GetPrimitive(),
156                                             getChildEdgeAt(0)->getMemory().GetPrimitive()));
157     }
158 }
159
160 void MKLDNNQuantizeNode::execute(mkldnn::stream strm) {
161     if (prim) {
162         MKLDNNNode::execute(strm);
163     } else {
164         auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr();
165         auto &inputLowMemory = getParentEdgeAt(1)->getMemoryPtr();
166         auto &inputHighMemory = getParentEdgeAt(2)->getMemoryPtr();
167         auto &outputLowMemory = getParentEdgeAt(3)->getMemoryPtr();
168         auto &outputHighMemory = getParentEdgeAt(4)->getMemoryPtr();
169         auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr();
170
171         auto srcData = reinterpret_cast<const float *>(srcMemory->GetData());
172         auto inputLowData = reinterpret_cast<const float *>(inputLowMemory->GetData());
173         auto inputHighData = reinterpret_cast<const float *>(inputHighMemory->GetData());
174         auto outputLowData = reinterpret_cast<const float *>(outputLowMemory->GetData());
175         auto outputHighData = reinterpret_cast<const float *>(outputHighMemory->GetData());
176         auto dstData = reinterpret_cast<float *>(dstMemory->GetData());
177
178         srcData += srcMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
179         inputLowData += inputLowMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
180         inputHighData += inputHighMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
181         outputLowData += outputLowMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
182         outputHighData += outputHighMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
183         dstData += dstMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
184
185         size_t N = static_cast<size_t>(batchToProcess());
186         size_t C = static_cast<size_t>(srcMemory->GetDims()[1]);
187         size_t H = static_cast<size_t>(srcMemory->GetDims()[2]);
188         size_t W = static_cast<size_t>(srcMemory->GetDims()[3]);
189
190         int inputLowAxis = inputLowMemory->GetDims().size() == 1 ? 0 : 1;
191         bool isInputLowBroadcasted = inputLowMemory->GetDims()[inputLowAxis] != C;
192
193         int inputHighAxis = inputHighMemory->GetDims().size() == 1 ? 0 : 1;
194         bool isInputHighBroadcasted = inputHighMemory->GetDims()[inputHighAxis] != C;
195
196         int outputLowAxis = outputLowMemory->GetDims().size() == 1 ? 0 : 1;
197         bool isOutputLowBroadcasted = outputLowMemory->GetDims()[outputLowAxis] != C;
198
199         int outputHighAxis = outputHighMemory->GetDims().size() == 1 ? 0 : 1;
200         bool isOutputHighBroadcasted = outputHighMemory->GetDims()[outputHighAxis] != C;
201
202         for (int n = 0; n < N; n++) {
203             for (int h = 0; h < H; h++) {
204                 for (int w = 0; w < W; w++) {
205                     for (int c = 0; c < C; c++) {
206                         size_t idx = n * H * W * C + h * W * C + w * C + c;
207
208                         float inputLow = inputLowData[isInputLowBroadcasted ? 0 : c];
209                         float inputHigh = inputHighData[isInputHighBroadcasted ? 0 : c];
210                         float outputLow = outputLowData[isOutputLowBroadcasted ? 0 : c];
211                         float outputHigh = outputHighData[isOutputHighBroadcasted ? 0 : c];
212
213                         if (srcData[idx] <= inputLow)
214                             dstData[idx] = outputLow;
215                         else if (srcData[idx] > inputHigh)
216                             dstData[idx] = outputHigh;
217                         else
218                             dstData[idx] = roundf((srcData[idx] - inputLow) / (inputHigh - inputLow) * (levels - 1)) /
219                                            (levels - 1) * (outputHigh - outputLow) + outputLow;
220                     }
221                 }
222             }
223         }
224     }
225 }
226
227 bool MKLDNNQuantizeNode::created() const {
228     return getType() == Quantize;
229 }