2 // Copyright 2016-2018 Intel Corporation.
4 // This software and the related documents are Intel copyrighted materials,
5 // and your use of them is governed by the express license under which they
6 // were provided to you (End User License Agreement for the Intel(R) Software
7 // Development Products (Version May 2017)). Unless the License provides
8 // otherwise, you may not use, modify, copy, publish, distribute, disclose or
9 // transmit this software or the related documents without Intel's prior
10 // written permission.
12 // This software and the related documents are provided as is, with no
13 // express or implied warranties, other than those that are expressly
14 // stated in the License.
18 #include <gtest/gtest.h>
19 #include <inference_engine/layer_transform.hpp>
20 #include <gna-api-types-xnn.h>
21 #include "gna_plugin/quantization/model_quantizer.hpp"
22 #include "gna_plugin/quantization/layer_quantizer.hpp"
23 #include "gna_matcher.hpp"
25 using namespace InferenceEngine;
26 using namespace GNAPluginNS;
27 using namespace GNATestIRs;
29 class I16QuantisationTest : public GNATest {
31 LayersQuantizer<QuantI16> lc = LayersQuantizer<QuantI16>(1.0f);
33 InferenceEngine::CNNLayerPtr quantize (InferenceEngine::CNNLayerPtr lp) {
34 auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp);
35 transformLayer(newLayer, lc);
40 void SetUp() override {
46 T setWeights(T blob) {
48 // actual quantisation algorithm is involved - we need to provide weights that will be quantized with scale factor of 1
49 for (auto && w : *blob) {
50 w = MAX_VAL_2B_WEIGHT;
56 TBlob<uint8_t>::Ptr setWeights(TBlob<uint8_t>::Ptr blob) {
58 auto buf = blob->buffer();
59 auto ptr = buf.as<float*>();
61 for (int i = 0; i != blob->byteSize() / 4; i++) {
62 ptr[i] = MAX_VAL_2B_WEIGHT;
68 // TODO: add test for FC weights after quantization
69 TEST_F(I16QuantisationTest, canQuantizeFCLayer){
71 auto fc = std::make_shared<FullyConnectedLayer>(LayerParams{"name", "type", Precision::FP32});
73 fc->_weights = setWeights(make_shared_blob<float>(Precision::FP32, {1, 1}));
74 fillWeights(fc->_weights);
75 fc->_biases = make_shared_blob<float>(Precision::FP32, Layout::NC, {1, 1});
76 fc->_biases->allocate();
77 fillWeights(fc->_biases);
79 std::shared_ptr<Data> outData = std::make_shared<Data>("data", SizeVector({1, 1}), Precision::FP32, Layout::NC);
80 fc->outData.push_back(outData);
81 fc->insData.push_back(outData);
84 ASSERT_NO_THROW(quantize(fc));
87 TEST_F(I16QuantisationTest, canQuantizeActivation){
89 auto sigmoid = std::make_shared<GenericLayer >(LayerParams{"name", "type", Precision::FP32});
90 sigmoid->params["value"] = 2;
91 sigmoid->type = "Activation";
93 ASSERT_NO_THROW(quantize(sigmoid));
96 TEST_F(I16QuantisationTest, outputAffinePrecisionIs32Bits){
98 ModelQuantizer<QuantI16> q;
100 CNNNetReader net_reader;
101 ASSERT_NO_THROW(net_reader.ReadNetwork(Fc2DOutputModel().data(), Fc2DOutputModel().length()));
103 auto weights = make_shared_blob<uint8_t>(Precision::U8, C, {440});
105 fillWeights(weights);
106 net_reader.SetWeights(weights);
108 auto newNet = q.quantize(net_reader.getNetwork(), 1000);
109 InputsDataMap inputs;
110 newNet->getInputsInfo(inputs);
111 auto affineDataPtr = inputs.begin()->second->getInputData()->inputTo.begin()->second->outData.front();
113 ASSERT_EQ(affineDataPtr->precision, Precision::I32);
117 TEST_F(I16QuantisationTest, canQuantizeLstmLikeTopology) {
118 ModelQuantizer<QuantI16> q;
120 CNNNetReader net_reader;
121 ASSERT_NO_THROW(net_reader.ReadNetwork(affineToMemoryModel().data(), affineToMemoryModel().length()));
123 auto weights = setWeights(make_shared_blob<uint8_t >(Precision::U8, C, {440}));
124 //std::fill_n(weights->buffer().as<float*>(), weights->byteSize()/sizeof(float), 0);
125 net_reader.SetWeights(weights);
127 ASSERT_NO_THROW(q.quantize(net_reader.getNetwork(), 1000));
130 TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect){
132 ModelQuantizer<QuantI16> q;
134 CNNNetReader net_reader;
135 ASSERT_NO_THROW(net_reader.ReadNetwork(Fc2DOutputModel().data(), Fc2DOutputModel().length()));
137 auto weights = make_shared_blob<uint8_t >(Precision::U8, C, {440});
139 fillWeights(weights, {100});
140 net_reader.SetWeights(weights);
142 auto newNet = q.quantize(net_reader.getNetwork(), 1000);
143 InputsDataMap inputs;
144 newNet->getInputsInfo(inputs);
145 auto affineLayerPtr = inputs.begin()->second->getInputData()->inputTo.begin()->second;
147 auto quantParams = getInjectedData<QuantizedLayerParams>(affineLayerPtr);
150 ASSERT_FLOAT_EQ(quantParams->_dst_quant.scale, 100);
151 ASSERT_FLOAT_EQ(quantParams->_weights_quant.scale, 100);
154 TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion) {
156 .onInferModel(Fc2DOutputModel())
158 .gna().propagate_forward().called_without().pwl_inserted_into_nnet();
161 TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion_ProfilingEnabled) {
163 .onInferModel(Fc2DOutputModel())
165 .gna().propagate_forward().called_without().pwl_inserted_into_nnet().profiling_counters();
168 TEST_F(I16QuantisationTest, OnlyAffineWithNanScaleFactorFails) {
170 .onInferModel(Fc2DOutputModel())
171 .withNanScaleFactor()
172 .propagate_forward().throws();
175 TEST_F(I16QuantisationTest, OnlyAffineWithInfScaleFactorFails) {
177 .onInferModel(Fc2DOutputModel())
178 .withInfScaleFactor()
179 .propagate_forward().throws();
182 TEST_F(I16QuantisationTest, AffineToMemoryWillResultInActivationInsertion) {
184 .onInferModel(affineToMemoryModel())
186 .gna().propagate_forward().called_with().pwl_inserted_into_nnet();
189 TEST_F(I16QuantisationTest, EltwiseToMemoryWithNoOutputActivationInsertion) {
190 assert_that().onInferModel(eltwiseToMemoryModelNoOutput(), [](CNNNetwork & net){
191 net.addOutput("Eltwise_8");
192 }).inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet();
195 TEST_F(I16QuantisationTest, EltwiseToMemory_ActivationInsertion) {
196 assert_that().onInferModel(eltwiseToMemoryModel())
197 .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet();
201 TEST_F(I16QuantisationTest, SplitFollowedByActivation_DummyDiagonalAffineInsertion) {
202 assert_that().onInferModel(activationAfterSplitModel())
203 .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
206 TEST_F(I16QuantisationTest, DISABLED_SliceFollowedBy2FCsAnd2Eltwises_AlignedFilterInsertion) {
207 assert_that().onInferModel(twoFCWithPaddingAfterSliceModel())
208 .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
211 // ToDo requires implementation of aligning filter for concat inputs and improvement of
212 // qunatization/scaling algorithm for concat
213 TEST_F(I16QuantisationTest, DISABLED_DoubleConcatPropageteForwardWithSuccess_AlignedFilterInsertion) {
214 assert_that().onInferModel(doubleConcatModel())
215 .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
218 TEST_F(I16QuantisationTest, EltwiseSumm_onlyOneIdentityInsertion) {
219 assert_that().onInferModel(eltwiseSummModel())
220 .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
224 TEST_F(I16QuantisationTest, canDetectLeakyRelu) {
225 assert_that().onInferModel(TFLeakyReluModel())
226 .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet();
229 TEST_F(I16QuantisationTest, MaxPool_followedAfterActivation) {
230 assert_that().onInferModel(maxpoolAfterRelu())
231 .inNotCompactMode().gna().propagate_forward().called_with()
232 .convolution_inserted_into_nnet()
234 .pwl_inserted_into_nnet()
236 .max_pooling_inserted_into_nnet();
239 TEST_F(I16QuantisationTest, EltwiseMull_willInsertTwoIdentities) {
240 assert_that().onInferModel(eltwiseMulModel())
241 .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice();
244 TEST_F(I16QuantisationTest, multiple_inputs_supported) {
245 assert_that().onInferModel(two_inputs_to_affine())
246 .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
248 TEST_F(I16QuantisationTest, multiple_inputs_can_handle_individual_scale_factors) {
249 std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
250 std::vector<float> input2_data = {2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0};
251 std::vector<float> result = {0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5};
253 assert_that().onInferModel(two_inputs_to_affine())
254 .inNotCompactMode().gna().propagate_forward()
255 .called_with().inputScale("input_1", 2).And()
256 .inputScale("input_2", 2).returns().result().filledWith(16384).that().equal_to(result);
259 TEST_F(I16QuantisationTest, DISABLED_multiple_inputs_into_concat_supported) {
260 assert_that().onInferModel(two_inputs_to_concat())
261 .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
264 TEST_F(I16QuantisationTest, ScaleShift_Affine_WillResultInIdentityInsertion) {
265 assert_that().onInferModel(scaleShiftAffineModel())
266 .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
269 TEST_F(I16QuantisationTest, ClampFollowedByTanh_ResultInDiagonalInsertion) {
270 assert_that().onInferModel(clampFollowedByTanhModel())
271 .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet().twice();
274 TEST_F(I16QuantisationTest, EltwiseWithMemoryAndActivationInput_ResultInDiagonalInsertion) {
275 assert_that().onInferModel(eltwiseWithMemoryAndActivationInputModel())
276 .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet().once();
279 TEST_F(I16QuantisationTest, AffineWith2AffineOutputs_ResultInOnlyOneIdentityInsertion) {
280 // one Identity activation from first FC, and one Identity activation for eltwise
281 assert_that().onInferModel(AffineWith2AffineOutputsModel())
282 .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice();
285 TEST_F(I16QuantisationTest, ScaleShiftWithBroadcast_ResultInDiagonalInsertion) {
287 auto & affineWeights = storage<std::vector<uint16_t>>();
290 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
291 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
292 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
293 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
294 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
297 assert_that().onInferModel(ScaleShift3DModel()).withWeigthsPattern({1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f})
298 .inNotCompactMode().gna().propagate_forward().called_with().called_with().affine_weights_eq(affineWeights);
301 // TODO: this mode not required in rel life scenarios so far
302 TEST_F(I16QuantisationTest, DISABLED_AffineWithOutputToMemoryAndToAnotherNode_ResultInCopyInsertion) {
303 assert_that().onInferModel(affineToMemoryModel()).inNotCompactMode().gna().propagate_forward().
304 called_with().copy_inserted_into_nnet();
307 TEST_F(I16QuantisationTest, DISABLED_permutationOfWeightsBetweenConvAndAffine) {
308 auto & affineWeights = storage<std::vector<uint16_t>>();
310 // least likely that width and height both are multiple of 7
311 auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
313 // here weights are transpozed
314 save().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern)
315 .inNotCompactMode().from().propagate_forward().affine_weights_transpozed({128, 61}).to(affineWeights);
317 // here weights shouldn't be transposed
318 assert_that().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern)
319 .inNotCompactMode().gna().propagate_forward().called_with().affine_weights_eq(affineWeights);
322 TEST_F(I16QuantisationTest, DISABLED_noPermutationOfWeightsBetweenConvAndAffineIfPermuteLayerWithCorrectArgs) {
323 auto & affineWeights = storage<std::vector<uint16_t>>();
325 // least likely that width and height both are multiple of 7
326 auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
328 save().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern)
329 .inNotCompactMode().from().propagate_forward().affine_weights().to(affineWeights);
331 assert_that().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern)
332 .inNotCompactMode().gna().propagate_forward().called_with().affine_weights_transposed(affineWeights, {128, 61});