Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / tests / unit / engines / gna / i16_quantisation_test.cpp
1 //
2 // Copyright 2016-2018 Intel Corporation.
3 //
4 // This software and the related documents are Intel copyrighted materials,
5 // and your use of them is governed by the express license under which they
6 // were provided to you (End User License Agreement for the Intel(R) Software
7 // Development Products (Version May 2017)). Unless the License provides
8 // otherwise, you may not use, modify, copy, publish, distribute, disclose or
9 // transmit this software or the related documents without Intel's prior
10 // written permission.
11 //
12 // This software and the related documents are provided as is, with no
13 // express or implied warranties, other than those that are expressly
14 // stated in the License.
15 //
16
17 #include <vector>
18 #include <gtest/gtest.h>
19 #include <inference_engine/layer_transform.hpp>
20 #include <gna-api-types-xnn.h>
21 #include "gna_plugin/quantization/model_quantizer.hpp"
22 #include "gna_plugin/quantization/layer_quantizer.hpp"
23 #include "gna_matcher.hpp"
24
25 using namespace InferenceEngine;
26 using namespace GNAPluginNS;
27 using namespace GNATestIRs;
28
29 class I16QuantisationTest : public GNATest {
30  protected:
31     LayersQuantizer<QuantI16> lc = LayersQuantizer<QuantI16>(1.0f);
32
33     InferenceEngine::CNNLayerPtr  quantize (InferenceEngine::CNNLayerPtr lp) {
34         auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp);
35         transformLayer(newLayer, lc);
36         return newLayer;
37     };
38
39
40     void SetUp() override  {
41     }
42
43 };
44
45 template <class T>
46 T  setWeights(T blob) {
47     blob->allocate();
48     // actual quantisation algorithm is involved - we need to provide weights that will be quantized with scale factor of 1
49     for (auto && w : *blob) {
50         w = MAX_VAL_2B_WEIGHT;
51     }
52     return blob;
53 }
54
55 template <>
56 TBlob<uint8_t>::Ptr  setWeights(TBlob<uint8_t>::Ptr blob) {
57     blob->allocate();
58     auto buf = blob->buffer();
59     auto ptr = buf.as<float*>();
60
61     for (int i = 0; i != blob->byteSize() / 4; i++) {
62         ptr[i] = MAX_VAL_2B_WEIGHT;
63     }
64     return blob;
65 }
66
67
68 // TODO: add test for FC weights after quantization
69 TEST_F(I16QuantisationTest, canQuantizeFCLayer){
70
71     auto fc = std::make_shared<FullyConnectedLayer>(LayerParams{"name", "type", Precision::FP32});
72     fc->_out_num = 9;
73     fc->_weights = setWeights(make_shared_blob<float>(Precision::FP32, {1, 1}));
74     fillWeights(fc->_weights);
75     fc->_biases  = make_shared_blob<float>(Precision::FP32, Layout::NC, {1, 1});
76     fc->_biases->allocate();
77     fillWeights(fc->_biases);
78
79     std::shared_ptr<Data> outData = std::make_shared<Data>("data", SizeVector({1, 1}), Precision::FP32, Layout::NC);
80     fc->outData.push_back(outData);
81     fc->insData.push_back(outData);
82
83
84     ASSERT_NO_THROW(quantize(fc));
85 }
86
87 TEST_F(I16QuantisationTest, canQuantizeActivation){
88
89     auto sigmoid = std::make_shared<GenericLayer >(LayerParams{"name", "type", Precision::FP32});
90     sigmoid->params["value"] = 2;
91     sigmoid->type = "Activation";
92
93     ASSERT_NO_THROW(quantize(sigmoid));
94 }
95
96 TEST_F(I16QuantisationTest, outputAffinePrecisionIs32Bits){
97
98     ModelQuantizer<QuantI16> q;
99
100     CNNNetReader net_reader;
101     ASSERT_NO_THROW(net_reader.ReadNetwork(Fc2DOutputModel().data(), Fc2DOutputModel().length()));
102
103     auto weights = make_shared_blob<uint8_t>(Precision::U8, C, {440});
104     weights->allocate();
105     fillWeights(weights);
106     net_reader.SetWeights(weights);
107
108     auto newNet = q.quantize(net_reader.getNetwork(), 1000);
109     InputsDataMap inputs;
110     newNet->getInputsInfo(inputs);
111     auto affineDataPtr = inputs.begin()->second->getInputData()->inputTo.begin()->second->outData.front();
112
113     ASSERT_EQ(affineDataPtr->precision, Precision::I32);
114 }
115
116
117 TEST_F(I16QuantisationTest, canQuantizeLstmLikeTopology) {
118     ModelQuantizer<QuantI16> q;
119
120     CNNNetReader net_reader;
121     ASSERT_NO_THROW(net_reader.ReadNetwork(affineToMemoryModel().data(), affineToMemoryModel().length()));
122
123     auto weights = setWeights(make_shared_blob<uint8_t >(Precision::U8, C, {440}));
124     //std::fill_n(weights->buffer().as<float*>(), weights->byteSize()/sizeof(float), 0);
125     net_reader.SetWeights(weights);
126
127     ASSERT_NO_THROW(q.quantize(net_reader.getNetwork(), 1000));
128 }
129
130 TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect){
131
132     ModelQuantizer<QuantI16> q;
133
134     CNNNetReader net_reader;
135     ASSERT_NO_THROW(net_reader.ReadNetwork(Fc2DOutputModel().data(), Fc2DOutputModel().length()));
136
137     auto weights = make_shared_blob<uint8_t >(Precision::U8, C, {440});
138     weights->allocate();
139     fillWeights(weights, {100});
140     net_reader.SetWeights(weights);
141
142     auto newNet = q.quantize(net_reader.getNetwork(), 1000);
143     InputsDataMap inputs;
144     newNet->getInputsInfo(inputs);
145     auto affineLayerPtr = inputs.begin()->second->getInputData()->inputTo.begin()->second;
146
147     auto quantParams = getInjectedData<QuantizedLayerParams>(affineLayerPtr);
148
149
150     ASSERT_FLOAT_EQ(quantParams->_dst_quant.scale, 100);
151     ASSERT_FLOAT_EQ(quantParams->_weights_quant.scale, 100);
152 }
153
154 TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion) {
155     assert_that()
156         .onInferModel(Fc2DOutputModel())
157         .inNotCompactMode()
158         .gna().propagate_forward().called_without().pwl_inserted_into_nnet();
159 }
160
161 TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion_ProfilingEnabled) {
162     assert_that()
163         .onInferModel(Fc2DOutputModel())
164         .inNotCompactMode()
165         .gna().propagate_forward().called_without().pwl_inserted_into_nnet().profiling_counters();
166 }
167
168 TEST_F(I16QuantisationTest, OnlyAffineWithNanScaleFactorFails) {
169     gna()
170         .onInferModel(Fc2DOutputModel())
171         .withNanScaleFactor()
172         .propagate_forward().throws();
173 }
174
175 TEST_F(I16QuantisationTest, OnlyAffineWithInfScaleFactorFails) {
176     gna()
177         .onInferModel(Fc2DOutputModel())
178         .withInfScaleFactor()
179         .propagate_forward().throws();
180 }
181
182 TEST_F(I16QuantisationTest, AffineToMemoryWillResultInActivationInsertion) {
183     assert_that()
184         .onInferModel(affineToMemoryModel())
185         .inNotCompactMode()
186         .gna().propagate_forward().called_with().pwl_inserted_into_nnet();
187 }
188
189 TEST_F(I16QuantisationTest, EltwiseToMemoryWithNoOutputActivationInsertion) {
190     assert_that().onInferModel(eltwiseToMemoryModelNoOutput(), [](CNNNetwork & net){
191             net.addOutput("Eltwise_8");
192         }).inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet();
193 }
194
195 TEST_F(I16QuantisationTest, EltwiseToMemory_ActivationInsertion) {
196     assert_that().onInferModel(eltwiseToMemoryModel())
197         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet();
198 }
199
200
201 TEST_F(I16QuantisationTest, SplitFollowedByActivation_DummyDiagonalAffineInsertion) {
202     assert_that().onInferModel(activationAfterSplitModel())
203         .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
204 }
205
206 TEST_F(I16QuantisationTest, DISABLED_SliceFollowedBy2FCsAnd2Eltwises_AlignedFilterInsertion) {
207     assert_that().onInferModel(twoFCWithPaddingAfterSliceModel())
208         .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
209 }
210
211 // ToDo requires implementation of aligning filter for concat inputs and improvement of
212 // qunatization/scaling algorithm for concat
213 TEST_F(I16QuantisationTest, DISABLED_DoubleConcatPropageteForwardWithSuccess_AlignedFilterInsertion) {
214     assert_that().onInferModel(doubleConcatModel())
215         .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
216 }
217
218 TEST_F(I16QuantisationTest, EltwiseSumm_onlyOneIdentityInsertion) {
219     assert_that().onInferModel(eltwiseSummModel())
220         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
221 }
222
223
224 TEST_F(I16QuantisationTest, canDetectLeakyRelu) {
225     assert_that().onInferModel(TFLeakyReluModel())
226         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet();
227 }
228
229 TEST_F(I16QuantisationTest, MaxPool_followedAfterActivation) {
230     assert_that().onInferModel(maxpoolAfterRelu())
231         .inNotCompactMode().gna().propagate_forward().called_with()
232         .convolution_inserted_into_nnet()
233         .And()
234         .pwl_inserted_into_nnet()
235         .And()
236         .max_pooling_inserted_into_nnet();
237 }
238
239 TEST_F(I16QuantisationTest, EltwiseMull_willInsertTwoIdentities) {
240     assert_that().onInferModel(eltwiseMulModel())
241         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice();
242 }
243
244 TEST_F(I16QuantisationTest, multiple_inputs_supported) {
245     assert_that().onInferModel(two_inputs_to_affine())
246         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
247 }
248 TEST_F(I16QuantisationTest, multiple_inputs_can_handle_individual_scale_factors) {
249     std::vector<float> input_data  = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
250     std::vector<float> input2_data = {2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0};
251     std::vector<float> result      = {0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5};
252
253     assert_that().onInferModel(two_inputs_to_affine())
254         .inNotCompactMode().gna().propagate_forward()
255         .called_with().inputScale("input_1", 2).And()
256         .inputScale("input_2", 2).returns().result().filledWith(16384).that().equal_to(result);
257 }
258
259 TEST_F(I16QuantisationTest, DISABLED_multiple_inputs_into_concat_supported) {
260     assert_that().onInferModel(two_inputs_to_concat())
261         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
262 }
263
264 TEST_F(I16QuantisationTest, ScaleShift_Affine_WillResultInIdentityInsertion) {
265     assert_that().onInferModel(scaleShiftAffineModel())
266         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
267 }
268
269 TEST_F(I16QuantisationTest, ClampFollowedByTanh_ResultInDiagonalInsertion) {
270     assert_that().onInferModel(clampFollowedByTanhModel())
271         .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet().twice();
272 }
273
274 TEST_F(I16QuantisationTest, EltwiseWithMemoryAndActivationInput_ResultInDiagonalInsertion) {
275     assert_that().onInferModel(eltwiseWithMemoryAndActivationInputModel())
276         .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet().once();
277 }
278
279 TEST_F(I16QuantisationTest, AffineWith2AffineOutputs_ResultInOnlyOneIdentityInsertion) {
280     // one Identity activation from first FC, and one Identity activation for eltwise
281     assert_that().onInferModel(AffineWith2AffineOutputsModel())
282         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice();
283 }
284
285 TEST_F(I16QuantisationTest, ScaleShiftWithBroadcast_ResultInDiagonalInsertion) {
286
287     auto & affineWeights = storage<std::vector<uint16_t>>();
288
289     affineWeights = {
290         2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
291         2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
292         2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
293         2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
294         2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
295     };
296
297     assert_that().onInferModel(ScaleShift3DModel()).withWeigthsPattern({1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f})
298         .inNotCompactMode().gna().propagate_forward().called_with().called_with().affine_weights_eq(affineWeights);
299 }
300
301 // TODO: this mode not required in rel life scenarios so far
302 TEST_F(I16QuantisationTest, DISABLED_AffineWithOutputToMemoryAndToAnotherNode_ResultInCopyInsertion) {
303     assert_that().onInferModel(affineToMemoryModel()).inNotCompactMode().gna().propagate_forward().
304         called_with().copy_inserted_into_nnet();
305 }
306
307 TEST_F(I16QuantisationTest, DISABLED_permutationOfWeightsBetweenConvAndAffine) {
308     auto & affineWeights = storage<std::vector<uint16_t>>();
309
310     // least likely that width and height both are multiple of 7
311     auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
312
313     // here weights are transpozed
314     save().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern)
315         .inNotCompactMode().from().propagate_forward().affine_weights_transpozed({128, 61}).to(affineWeights);
316
317     // here weights shouldn't be transposed
318     assert_that().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern)
319         .inNotCompactMode().gna().propagate_forward().called_with().affine_weights_eq(affineWeights);
320 }
321
322 TEST_F(I16QuantisationTest, DISABLED_noPermutationOfWeightsBetweenConvAndAffineIfPermuteLayerWithCorrectArgs) {
323     auto & affineWeights = storage<std::vector<uint16_t>>();
324
325     // least likely that width and height both are multiple of 7
326     auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
327
328     save().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern)
329         .inNotCompactMode().from().propagate_forward().affine_weights().to(affineWeights);
330
331     assert_that().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern)
332         .inNotCompactMode().gna().propagate_forward().called_with().affine_weights_transposed(affineWeights, {128, 61});
333 }