inference-engine/tests/unit/engines/gna/i16_quantisation_test.cpp

   1 //
   2 // Copyright 2016-2018 Intel Corporation.
   3 //
   4 // This software and the related documents are Intel copyrighted materials,
   5 // and your use of them is governed by the express license under which they
   6 // were provided to you (End User License Agreement for the Intel(R) Software
   7 // Development Products (Version May 2017)). Unless the License provides
   8 // otherwise, you may not use, modify, copy, publish, distribute, disclose or
   9 // transmit this software or the related documents without Intel's prior
  10 // written permission.
  11 //
  12 // This software and the related documents are provided as is, with no
  13 // express or implied warranties, other than those that are expressly
  14 // stated in the License.
  15 //
  16
  17 #include <vector>
  18 #include <gtest/gtest.h>
  19 #include <inference_engine/layer_transform.hpp>
  20 #include <gna-api-types-xnn.h>
  21 #include "gna_plugin/quantization/model_quantizer.hpp"
  22 #include "gna_plugin/quantization/layer_quantizer.hpp"
  23 #include "gna_matcher.hpp"
  24
  25 using namespace InferenceEngine;
  26 using namespace GNAPluginNS;
  27 using namespace GNATestIRs;
  28
  29 class I16QuantisationTest : public GNATest {
  30  protected:
  31     LayersQuantizer<QuantI16> lc = LayersQuantizer<QuantI16>(1.0f);
  32
  33     InferenceEngine::CNNLayerPtr  quantize (InferenceEngine::CNNLayerPtr lp) {
  34         auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp);
  35         transformLayer(newLayer, lc);
  36         return newLayer;
  37     };
  38
  39
  40     void SetUp() override  {
  41     }
  42
  43 };
  44
  45 template <class T>
  46 T  setWeights(T blob) {
  47     blob->allocate();
  48     // actual quantisation algorithm is involved - we need to provide weights that will be quantized with scale factor of 1
  49     for (auto && w : *blob) {
  50         w = MAX_VAL_2B_WEIGHT;
  51     }
  52     return blob;
  53 }
  54
  55 template <>
  56 TBlob<uint8_t>::Ptr  setWeights(TBlob<uint8_t>::Ptr blob) {
  57     blob->allocate();
  58     auto buf = blob->buffer();
  59     auto ptr = buf.as<float*>();
  60
  61     for (int i = 0; i != blob->byteSize() / 4; i++) {
  62         ptr[i] = MAX_VAL_2B_WEIGHT;
  63     }
  64     return blob;
  65 }
  66
  67
  68 // TODO: add test for FC weights after quantization
  69 TEST_F(I16QuantisationTest, canQuantizeFCLayer){
  70
  71     auto fc = std::make_shared<FullyConnectedLayer>(LayerParams{"name", "type", Precision::FP32});
  72     fc->_out_num = 9;
  73     fc->_weights = setWeights(make_shared_blob<float>(Precision::FP32, {1, 1}));
  74     fillWeights(fc->_weights);
  75     fc->_biases  = make_shared_blob<float>(Precision::FP32, Layout::NC, {1, 1});
  76     fc->_biases->allocate();
  77     fillWeights(fc->_biases);
  78
  79     std::shared_ptr<Data> outData = std::make_shared<Data>("data", SizeVector({1, 1}), Precision::FP32, Layout::NC);
  80     fc->outData.push_back(outData);
  81     fc->insData.push_back(outData);
  82
  83
  84     ASSERT_NO_THROW(quantize(fc));
  85 }
  86
  87 TEST_F(I16QuantisationTest, canQuantizeActivation){
  88
  89     auto sigmoid = std::make_shared<GenericLayer >(LayerParams{"name", "type", Precision::FP32});
  90     sigmoid->params["value"] = 2;
  91     sigmoid->type = "Activation";
  92
  93     ASSERT_NO_THROW(quantize(sigmoid));
  94 }
  95
  96 TEST_F(I16QuantisationTest, outputAffinePrecisionIs32Bits){
  97
  98     ModelQuantizer<QuantI16> q;
  99
 100     CNNNetReader net_reader;
 101     ASSERT_NO_THROW(net_reader.ReadNetwork(Fc2DOutputModel().data(), Fc2DOutputModel().length()));
 102
 103     auto weights = make_shared_blob<uint8_t>(Precision::U8, C, {440});
 104     weights->allocate();
 105     fillWeights(weights);
 106     net_reader.SetWeights(weights);
 107
 108     auto newNet = q.quantize(net_reader.getNetwork(), 1000);
 109     InputsDataMap inputs;
 110     newNet->getInputsInfo(inputs);
 111     auto affineDataPtr = inputs.begin()->second->getInputData()->inputTo.begin()->second->outData.front();
 112
 113     ASSERT_EQ(affineDataPtr->precision, Precision::I32);
 114 }
 115
 116
 117 TEST_F(I16QuantisationTest, canQuantizeLstmLikeTopology) {
 118     ModelQuantizer<QuantI16> q;
 119
 120     CNNNetReader net_reader;
 121     ASSERT_NO_THROW(net_reader.ReadNetwork(affineToMemoryModel().data(), affineToMemoryModel().length()));
 122
 123     auto weights = setWeights(make_shared_blob<uint8_t >(Precision::U8, C, {440}));
 124     //std::fill_n(weights->buffer().as<float*>(), weights->byteSize()/sizeof(float), 0);
 125     net_reader.SetWeights(weights);
 126
 127     ASSERT_NO_THROW(q.quantize(net_reader.getNetwork(), 1000));
 128 }
 129
 130 TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect){
 131
 132     ModelQuantizer<QuantI16> q;
 133
 134     CNNNetReader net_reader;
 135     ASSERT_NO_THROW(net_reader.ReadNetwork(Fc2DOutputModel().data(), Fc2DOutputModel().length()));
 136
 137     auto weights = make_shared_blob<uint8_t >(Precision::U8, C, {440});
 138     weights->allocate();
 139     fillWeights(weights, {100});
 140     net_reader.SetWeights(weights);
 141
 142     auto newNet = q.quantize(net_reader.getNetwork(), 1000);
 143     InputsDataMap inputs;
 144     newNet->getInputsInfo(inputs);
 145     auto affineLayerPtr = inputs.begin()->second->getInputData()->inputTo.begin()->second;
 146
 147     auto quantParams = getInjectedData<QuantizedLayerParams>(affineLayerPtr);
 148
 149
 150     ASSERT_FLOAT_EQ(quantParams->_dst_quant.scale, 100);
 151     ASSERT_FLOAT_EQ(quantParams->_weights_quant.scale, 100);
 152 }
 153
 154 TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion) {
 155     assert_that()
 156         .onInferModel(Fc2DOutputModel())
 157         .inNotCompactMode()
 158         .gna().propagate_forward().called_without().pwl_inserted_into_nnet();
 159 }
 160
 161 TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion_ProfilingEnabled) {
 162     assert_that()
 163         .onInferModel(Fc2DOutputModel())
 164         .inNotCompactMode()
 165         .gna().propagate_forward().called_without().pwl_inserted_into_nnet().profiling_counters();
 166 }
 167
 168 TEST_F(I16QuantisationTest, OnlyAffineWithNanScaleFactorFails) {
 169     gna()
 170         .onInferModel(Fc2DOutputModel())
 171         .withNanScaleFactor()
 172         .propagate_forward().throws();
 173 }
 174
 175 TEST_F(I16QuantisationTest, OnlyAffineWithInfScaleFactorFails) {
 176     gna()
 177         .onInferModel(Fc2DOutputModel())
 178         .withInfScaleFactor()
 179         .propagate_forward().throws();
 180 }
 181
 182 TEST_F(I16QuantisationTest, AffineToMemoryWillResultInActivationInsertion) {
 183     assert_that()
 184         .onInferModel(affineToMemoryModel())
 185         .inNotCompactMode()
 186         .gna().propagate_forward().called_with().pwl_inserted_into_nnet();
 187 }
 188
 189 TEST_F(I16QuantisationTest, EltwiseToMemoryWithNoOutputActivationInsertion) {
 190     assert_that().onInferModel(eltwiseToMemoryModelNoOutput(), [](CNNNetwork & net){
 191             net.addOutput("Eltwise_8");
 192         }).inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet();
 193 }
 194
 195 TEST_F(I16QuantisationTest, EltwiseToMemory_ActivationInsertion) {
 196     assert_that().onInferModel(eltwiseToMemoryModel())
 197         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet();
 198 }
 199
 200
 201 TEST_F(I16QuantisationTest, SplitFollowedByActivation_DummyDiagonalAffineInsertion) {
 202     assert_that().onInferModel(activationAfterSplitModel())
 203         .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
 204 }
 205
 206 TEST_F(I16QuantisationTest, DISABLED_SliceFollowedBy2FCsAnd2Eltwises_AlignedFilterInsertion) {
 207     assert_that().onInferModel(twoFCWithPaddingAfterSliceModel())
 208         .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
 209 }
 210
 211 // ToDo requires implementation of aligning filter for concat inputs and improvement of
 212 // qunatization/scaling algorithm for concat
 213 TEST_F(I16QuantisationTest, DISABLED_DoubleConcatPropageteForwardWithSuccess_AlignedFilterInsertion) {
 214     assert_that().onInferModel(doubleConcatModel())
 215         .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
 216 }
 217
 218 TEST_F(I16QuantisationTest, EltwiseSumm_onlyOneIdentityInsertion) {
 219     assert_that().onInferModel(eltwiseSummModel())
 220         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
 221 }
 222
 223
 224 TEST_F(I16QuantisationTest, canDetectLeakyRelu) {
 225     assert_that().onInferModel(TFLeakyReluModel())
 226         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet();
 227 }
 228
 229 TEST_F(I16QuantisationTest, MaxPool_followedAfterActivation) {
 230     assert_that().onInferModel(maxpoolAfterRelu())
 231         .inNotCompactMode().gna().propagate_forward().called_with()
 232         .convolution_inserted_into_nnet()
 233         .And()
 234         .pwl_inserted_into_nnet()
 235         .And()
 236         .max_pooling_inserted_into_nnet();
 237 }
 238
 239 TEST_F(I16QuantisationTest, EltwiseMull_willInsertTwoIdentities) {
 240     assert_that().onInferModel(eltwiseMulModel())
 241         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice();
 242 }
 243
 244 TEST_F(I16QuantisationTest, multiple_inputs_supported) {
 245     assert_that().onInferModel(two_inputs_to_affine())
 246         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
 247 }
 248 TEST_F(I16QuantisationTest, multiple_inputs_can_handle_individual_scale_factors) {
 249     std::vector<float> input_data  = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
 250     std::vector<float> input2_data = {2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0};
 251     std::vector<float> result      = {0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5};
 252
 253     assert_that().onInferModel(two_inputs_to_affine())
 254         .inNotCompactMode().gna().propagate_forward()
 255         .called_with().inputScale("input_1", 2).And()
 256         .inputScale("input_2", 2).returns().result().filledWith(16384).that().equal_to(result);
 257 }
 258
 259 TEST_F(I16QuantisationTest, DISABLED_multiple_inputs_into_concat_supported) {
 260     assert_that().onInferModel(two_inputs_to_concat())
 261         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
 262 }
 263
 264 TEST_F(I16QuantisationTest, ScaleShift_Affine_WillResultInIdentityInsertion) {
 265     assert_that().onInferModel(scaleShiftAffineModel())
 266         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
 267 }
 268
 269 TEST_F(I16QuantisationTest, ClampFollowedByTanh_ResultInDiagonalInsertion) {
 270     assert_that().onInferModel(clampFollowedByTanhModel())
 271         .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet().twice();
 272 }
 273
 274 TEST_F(I16QuantisationTest, EltwiseWithMemoryAndActivationInput_ResultInDiagonalInsertion) {
 275     assert_that().onInferModel(eltwiseWithMemoryAndActivationInputModel())
 276         .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet().once();
 277 }
 278
 279 TEST_F(I16QuantisationTest, AffineWith2AffineOutputs_ResultInOnlyOneIdentityInsertion) {
 280     // one Identity activation from first FC, and one Identity activation for eltwise
 281     assert_that().onInferModel(AffineWith2AffineOutputsModel())
 282         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice();
 283 }
 284
 285 TEST_F(I16QuantisationTest, ScaleShiftWithBroadcast_ResultInDiagonalInsertion) {
 286
 287     auto & affineWeights = storage<std::vector<uint16_t>>();
 288
 289     affineWeights = {
 290         2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
 291         2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
 292         2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
 293         2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
 294         2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
 295     };
 296
 297     assert_that().onInferModel(ScaleShift3DModel()).withWeigthsPattern({1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f})
 298         .inNotCompactMode().gna().propagate_forward().called_with().called_with().affine_weights_eq(affineWeights);
 299 }
 300
 301 // TODO: this mode not required in rel life scenarios so far
 302 TEST_F(I16QuantisationTest, DISABLED_AffineWithOutputToMemoryAndToAnotherNode_ResultInCopyInsertion) {
 303     assert_that().onInferModel(affineToMemoryModel()).inNotCompactMode().gna().propagate_forward().
 304         called_with().copy_inserted_into_nnet();
 305 }
 306
 307 TEST_F(I16QuantisationTest, DISABLED_permutationOfWeightsBetweenConvAndAffine) {
 308     auto & affineWeights = storage<std::vector<uint16_t>>();
 309
 310     // least likely that width and height both are multiple of 7
 311     auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
 312
 313     // here weights are transpozed
 314     save().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern)
 315         .inNotCompactMode().from().propagate_forward().affine_weights_transpozed({128, 61}).to(affineWeights);
 316
 317     // here weights shouldn't be transposed
 318     assert_that().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern)
 319         .inNotCompactMode().gna().propagate_forward().called_with().affine_weights_eq(affineWeights);
 320 }
 321
 322 TEST_F(I16QuantisationTest, DISABLED_noPermutationOfWeightsBetweenConvAndAffineIfPermuteLayerWithCorrectArgs) {
 323     auto & affineWeights = storage<std::vector<uint16_t>>();
 324
 325     // least likely that width and height both are multiple of 7
 326     auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
 327
 328     save().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern)
 329         .inNotCompactMode().from().propagate_forward().affine_weights().to(affineWeights);
 330
 331     assert_that().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern)
 332         .inNotCompactMode().gna().propagate_forward().called_with().affine_weights_transposed(affineWeights, {128, 61});
 333 }