inference-engine/thirdparty/clDNN/tests/test_cases/fused_conv_eltwise_gpu_test.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18 #include <gtest/gtest.h>
  19 #include "api/memory.hpp"
  20 #include <api/input_layout.hpp>
  21 #include "api/convolution.hpp"
  22 #include "api/eltwise.hpp"
  23 #include "api/reorder.hpp"
  24 #include <api/topology.hpp>
  25 #include <api/network.hpp>
  26 #include <api/engine.hpp>
  27 #include "test_utils/test_utils.h"
  28 #include <api/data.hpp>
  29
  30 #include <api_extension/fused_conv_eltwise.hpp>
  31
  32 #include <cassert>
  33 #include <cmath>
  34 #include <gmock/gmock.h>
  35 #include <limits>
  36
  37 using namespace cldnn;
  38 using namespace tests;
  39 using namespace testing;
  40
  41 TEST(fused_conv_eltwise, basic_0)
  42 {
  43     const auto& engine = get_test_engine();
  44
  45     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 4, 5 } });
  46     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
  47
  48     set_values(input, {
  49         1.0f,  2.0f, -15.f,  3.0f, 4.0f, -15.f, 5.0f,  6.0f, -15.f, 7.0f,
  50         -15.f, 0.0f,  0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f,  1.5f,  5.2f
  51     });
  52
  53     topology topology(
  54         input_layout("input", input.get_layout()),
  55         data("weights", weights),
  56         convolution("conv", "input", { "weights" }),
  57         eltwise("eltwise", "input", "conv", eltwise_mode::sum),
  58         reorder("out", "eltwise", format::bfyx, data_types::f32));
  59
  60     build_options opt;
  61     opt.set_option(build_option::optimize_data(true));
  62     network network(engine, topology, opt);
  63     network.set_input_data("input", input);
  64
  65     auto outputs = network.execute();
  66     EXPECT_EQ(outputs.size(), size_t(1));
  67     EXPECT_EQ(outputs.begin()->first, "out");
  68
  69     auto output = outputs.begin()->second.get_memory();
  70     auto&& out_layout = output.get_layout();
  71
  72     EXPECT_EQ(out_layout.format, format::bfyx);
  73     EXPECT_EQ(out_layout.size.batch[0], 1);
  74     EXPECT_EQ(out_layout.size.feature[0], 1);
  75     EXPECT_EQ(out_layout.size.spatial[0], 4);
  76     EXPECT_EQ(out_layout.size.spatial[1], 5);
  77 }
  78
  79 TEST(fused_conv_eltwise, dont_fuse_if_conv_elt_are_outputs)
  80 {
  81     const auto& engine = get_test_engine();
  82
  83     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 5 } });
  84     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
  85
  86     set_values(input, {
  87         1.0f,  2.0f, -15.f,  3.0f, 4.0f, -15.f, 5.0f,  6.0f, -15.f, 7.0f,
  88         -15.f, 0.0f,  0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f,  1.5f,  5.2f
  89         });
  90
  91     topology topology(
  92         input_layout("input", input.get_layout()),
  93         data("weights", weights),
  94         convolution("conv", "input", { "weights" }),
  95         eltwise("out", "input", "conv", eltwise_mode::sum));
  96
  97     build_options opt;
  98     opt.set_option(build_option::optimize_data(true));
  99     network network(engine, topology, opt);
 100     network.set_input_data("input", input);
 101
 102     auto outputs = network.execute();
 103     EXPECT_EQ(outputs.size(), size_t(1));
 104     EXPECT_EQ(outputs.begin()->first, "out");
 105
 106     auto output = outputs.begin()->second.get_memory();
 107     auto&& out_layout = output.get_layout();
 108
 109     EXPECT_EQ(out_layout.format, format::bfyx);
 110     EXPECT_EQ(out_layout.size.batch[0], 1);
 111     EXPECT_EQ(out_layout.size.feature[0], 1);
 112     EXPECT_EQ(out_layout.size.spatial[0], 4);
 113     EXPECT_EQ(out_layout.size.spatial[1], 5);
 114 }
 115
 116 template<typename InputTy,
 117          typename OutputTy>
 118 class FusedConvTest : public testing::Test
 119 {
 120 protected:
 121     static constexpr bool is_pure_float = std::is_same<InputTy, float>::value;
 122     using OutputPreActivationTy = typename std::conditional<is_pure_float, float, int32_t>::type;
 123     using WeightsTy = typename std::conditional<is_pure_float, float, int8_t>::type;
 124     using BiasesTy = typename std::conditional<is_pure_float, float, int32_t>::type;
 125
 126     topology the_topology;
 127
 128     std::vector<InputTy> input_values;
 129     std::vector<WeightsTy> weights_values;
 130     std::vector<BiasesTy> biases_values;
 131     // Note, not all of the quantization/calibration factors are used in all the
 132     // tests. However, I didn't come up with a way to correctly reflect that
 133     // while unifying the boileplate testing code.
 134     static constexpr float ignore = std::numeric_limits<float>::quiet_NaN();
 135     std::vector<float> input_quant_factors_values;
 136     std::vector<float> calibration_values;
 137
 138     // Eltw part.
 139     std::vector<InputTy> non_conv_input_values;
 140     std::vector<float> eltw_output_calibration_values;
 141     std::vector<OutputPreActivationTy> output_pre_relu;
 142
 143     void add_feature(std::vector<InputTy> input,
 144                      std::vector<WeightsTy> weights,
 145                      BiasesTy bias,
 146                      float input_quant_factor,
 147                      float conv_calibration,
 148                      std::vector<InputTy> non_conv_input,
 149                      float eltw_output_calibration,
 150                      std::vector<OutputPreActivationTy> output)
 151     {
 152         assert(non_conv_input.size() == output.size());
 153         input_values.insert(input_values.end(), input.begin(), input.end());
 154         weights_values.insert(
 155             weights_values.end(), weights.begin(), weights.end());
 156         biases_values.push_back(bias);
 157         input_quant_factors_values.push_back(input_quant_factor);
 158         calibration_values.push_back(conv_calibration);
 159         non_conv_input_values.insert(non_conv_input_values.end(),
 160                                      non_conv_input.begin(),
 161                                      non_conv_input.end());
 162         eltw_output_calibration_values.push_back(eltw_output_calibration);
 163         output_pre_relu.insert(
 164             output_pre_relu.end(), output.begin(), output.end());
 165     }
 166
 167     void do_test(const fused_conv_eltwise& fused_prim)
 168     {
 169         const auto& engine = get_test_engine();
 170
 171         int n_features = static_cast<int>(biases_values.size());
 172
 173         auto input_shape = tensor(1, n_features, 4, 1);
 174         auto weights_shape = tensor(n_features, n_features, 3, 1);
 175         auto biases_shape = tensor(1, n_features, 1, 1);
 176         auto sum_input_shape = tensor(1, n_features, 2, 1);
 177
 178         auto input = memory::allocate(
 179             engine,
 180             {type_to_data_type<InputTy>::value, format::bfyx, input_shape});
 181         auto weights = memory::allocate(
 182             engine,
 183             {type_to_data_type<WeightsTy>::value, format::bfyx, weights_shape});
 184
 185         auto biases = memory::allocate(
 186             engine,
 187             {type_to_data_type<BiasesTy>::value, format::bfyx, biases_shape});
 188         auto input_quant_factors = memory::allocate(
 189             engine, {data_types::f32, format::bfyx, biases_shape});
 190         auto conv_output_calibration = memory::allocate(
 191             engine, {data_types::f32, format::bfyx, biases_shape});
 192         auto sum_input = memory::allocate(
 193             engine,
 194             {type_to_data_type<InputTy>::value, format::bfyx, sum_input_shape});
 195         auto eltw_output_calibration = memory::allocate(
 196             engine, {data_types::f32, format::bfyx, biases_shape});
 197
 198         set_values(input, input_values);
 199         std::vector<WeightsTy> post_processed_weights_values(n_features
 200                                                              * n_features * 3);
 201         for (int output_feature = 0; output_feature < n_features; ++output_feature)
 202             for (int input_feature = 0; input_feature < n_features;
 203                  ++input_feature)
 204                 for (int x = 0; x < 3; ++x)
 205                 {
 206                     int idx =
 207                         output_feature * n_features * 3 + input_feature * 3 + x;
 208                     if (input_feature == output_feature)
 209                         post_processed_weights_values[idx] =
 210                             weights_values[input_feature * 3 + x];
 211                     else
 212                         post_processed_weights_values[idx] = 0;
 213                 }
 214         set_values(weights, post_processed_weights_values);
 215         set_values(biases, biases_values);
 216         set_values(input_quant_factors, input_quant_factors_values);
 217         set_values(conv_output_calibration, calibration_values);
 218         set_values(sum_input, non_conv_input_values);
 219         set_values(eltw_output_calibration, eltw_output_calibration_values);
 220
 221         the_topology.add(input_layout("input", input.get_layout()));
 222         the_topology.add(data("weights", weights));
 223         the_topology.add(data("biases", biases));
 224         the_topology.add(data("sum_input", sum_input));
 225         the_topology.add(data("input_quant_factors", input_quant_factors));
 226         the_topology.add(data("conv_output_calibration", conv_output_calibration));
 227         the_topology.add(data("eltw_output_calibration", eltw_output_calibration));
 228         the_topology.add(fused_prim);
 229
 230         build_options opts;
 231         opts.set_option(build_option::optimize_data(false));
 232
 233         network network(engine, the_topology, opts);
 234         network.set_input_data("input", input);
 235
 236         auto outputs = network.execute();
 237
 238         auto output_memory = outputs.at("fused_conv").get_memory();
 239         auto output_layout = output_memory.get_layout();
 240         auto output_ptr = output_memory.pointer<OutputTy>();
 241         int y_size = output_layout.size.spatial[1];
 242         int x_size = output_layout.size.spatial[0];
 243         int f_size = output_layout.size.feature[0];
 244         int b_size = output_layout.size.batch[0];
 245         EXPECT_EQ(output_layout.format, format::bfyx);
 246         EXPECT_EQ(y_size, 1);
 247         EXPECT_EQ(x_size, 2);
 248         EXPECT_EQ(f_size, n_features);
 249         EXPECT_EQ(b_size, 1);
 250
 251         for (int f = 0; f < f_size; f++)
 252             for (int x = 0; x < x_size; ++x)
 253             {
 254                 // printf("f: %d, x: %d\n", f, x);
 255                 OutputPreActivationTy expected =
 256                     pre_relu_to_output(output_pre_relu[f * x_size + x]);
 257                 auto actual = static_cast<OutputPreActivationTy>(
 258                     output_ptr[f * x_size + x]);
 259                 expect_eq(expected, actual);
 260             }
 261     }
 262
 263 private:
 264     template<typename T = OutputPreActivationTy>
 265     static typename std::enable_if<std::is_floating_point<T>::value>::type
 266     expect_eq(const OutputPreActivationTy& lhs, const OutputPreActivationTy& rhs)
 267     {
 268         EXPECT_NEAR(lhs, rhs, 0.001f);
 269     }
 270
 271     template<typename T = OutputPreActivationTy>
 272     static typename std::enable_if<std::is_integral<T>::value>::type
 273     expect_eq(const OutputPreActivationTy& lhs, const OutputPreActivationTy& rhs)
 274     {
 275         EXPECT_EQ(lhs, rhs);
 276     }
 277
 278     template <typename T>
 279     static T pre_relu_to_output(T pre_relu) {
 280       // No std::clamp before C++17 :(
 281       return std::min(
 282           static_cast<T>(std::numeric_limits<OutputTy>::max()),
 283           std::max(static_cast<T>(std::numeric_limits<OutputTy>::lowest()),
 284                    std::max(static_cast<T>(0), pre_relu)));
 285     }
 286 };
 287
 288 class FusedConvTest_all_float : public FusedConvTest<float, float>
 289 {};
 290
 291 TEST_F(FusedConvTest_all_float, basic) {
 292     add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
 293                 {2.0f, 0.0f, 1.0f},           // weights
 294                 1.0f,                         // bias
 295                 1.0f,                         // conv_input_quant
 296                 1.0f,                         // conv_output_calibration
 297                 {-10.0f, -10.0f},             // non_conv_input
 298                 1.0f,                         // eltw_output_calibration
 299                 {241.0f, 242.0f});            // output_pre_relu
 300
 301     add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
 302                 {2.0f, 0.0f, 1.0f},           // weights
 303                 0.0f,                         // bias
 304                 1.0f,                         // conv_input_quant
 305                 1.0f,                         // conv_output_calibration
 306                 {-10.0f, -11.0f},             // non_conv_input
 307                 2.0f,                         // eltw_output_calibration
 308                 {480.0f, 480.0f});            // output_pre_relu
 309
 310     do_test(fused_conv_eltwise("fused_conv",
 311                                "input",
 312                                "sum_input",
 313                                eltwise_mode::sum,
 314                                {"weights"},
 315                                {"biases"},
 316                                {"input_quant_factors"},
 317                                {"conv_output_calibration"},
 318                                1.0f, // conv_i_quantization_factor
 319                                1.0f, // non_conv_scale
 320                                "eltw_output_calibration",
 321                                {{1, 1, 1, 1}}, // eltw_stride
 322                                {1, 1, 1, 1},   // stride
 323                                {0, 0, 0, 0},   // input_offset
 324                                {1, 1, 1, 1},   // dilation
 325                                false,          // conv_with_activation
 326                                0.0f,           // con_activation_slp
 327                                true,           // eltw_activation
 328                                0.0f));         // eltw_activation_slp
 329 }
 330
 331 class FusedConvTest_no_conv_calibration : public FusedConvTest<float, float>
 332 {};
 333
 334 TEST_F(FusedConvTest_no_conv_calibration, basic) {
 335     // That might happen if both conv output and non-conv input happen to be
 336     // normalized to the same dynamic range of if tensor-wise (instead of
 337     // per-channel) calibration is used. Also, a similar thing might happen for
 338     // a convolution with calibration without quantization (which is the real
 339     // target of this test, needed for the Inference Engine).
 340
 341     // add_feature contains data for conv quantization/calibration, but the
 342     // primitive won't use it. It's just much easier to unify different tests
 343     // this way.
 344     add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
 345                 {2.0f, 0.0f, 1.0f},           // weights
 346                 1.0f,                         // bias
 347                 1.0f,                         // conv_input_quant
 348                 ignore,                       // conv_output_calibration
 349                 {-10.0f, -10.0f},             // non_conv_input
 350                 1.0f,                         // eltw_output_calibration
 351                 {241.0f, 242.0f});            // output_pre_relu
 352
 353     add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
 354                 {2.0f, 0.0f, 1.0f},           // weights
 355                 0.0f,                         // bias
 356                 1.0f,                         // conv_input_quant
 357                 ignore,                       // conv_output_calibration
 358                 {-10.0f, -11.0f},             // non_conv_input
 359                 2.0f,                         // eltw_output_calibration
 360                 {480.0f, 480.0f});            // output_pre_relu
 361
 362     do_test(fused_conv_eltwise("fused_conv",
 363                                "input",
 364                                "sum_input",
 365                                eltwise_mode::sum,
 366                                {"weights"},
 367                                {"biases"},
 368                                {"input_quant_factors"},
 369                                {},   // conv_output_calibration
 370                                1.0f, // conv_i_quantization_factor
 371                                1.0f, // non_conv_scale
 372                                "eltw_output_calibration",
 373                                {{1, 1, 1, 1}}, // eltw_stride
 374                                {1, 1, 1, 1},   // stride
 375                                {0, 0, 0, 0},   // input_offset
 376                                {1, 1, 1, 1},   // dilation
 377                                false,          // conv_with_activation
 378                                0.0f,           // con_activation_slp
 379                                true,           // eltw_activation
 380                                0.0f));         // eltw_activation_slp
 381 }
 382
 383 class FusedConvTest_non_conv_scale_per_primitive : public FusedConvTest<int8_t, int8_t>
 384 {};
 385
 386 TEST_F(FusedConvTest_non_conv_scale_per_primitive, basic) {
 387     // NOTE: The data in add_feature calls implicitly assumes this!
 388     const float non_conv_scale = 2.0f; // TODO: Need per-channel too?
 389
 390     // Check that the output precision is `u8` indeed. If it was not, than 251
 391     // would eighter be rounded to 250 or 252. Ensure it's not the case and the
 392     // outputs actually differ.
 393     add_feature({125, 125, 0, 1}, {2, 0, 1}, 1, 1.0f, ignore, {-10, -10}, 1.0f, {231, 232});
 394     add_feature({125, 125, 0, 1}, {2, 0, 1}, 0, 1.0f, ignore, {-10, -10}, 1.0f, {230, 231});
 395
 396     // Verify that activation is done before the final calibration+type
 397     // conversion (in other words, in higher precision than the output).
 398     add_feature({0, 50, 0, -50}, {0, 4, 4}, 1, 1.0f, ignore, {-10, -10}, 1.0f, {181, -219});
 399     add_feature({0, 50, 0, -50}, {0, 4, 4}, 1, 1.0f, ignore, {-5, -5}, 1.0f, {191, -209});
 400
 401     // Same but with non-unit calibration (just in case).
 402     add_feature({0, 50, 0, -50}, {0, 8, 8}, 2, 1.0f, ignore, {10, 10}, 0.5f, {211, -189});
 403
 404     do_test(fused_conv_eltwise("fused_conv",
 405                                "input",
 406                                "sum_input",
 407                                eltwise_mode::sum,
 408                                {"weights"},
 409                                {"biases"},
 410                                {"input_quant_factors"},
 411                                {},   // conv_output_calibration
 412                                1.0f, // conv_i_quantization_factor
 413                                non_conv_scale, // non_conv_scale
 414                                "eltw_output_calibration",
 415                                {{1, 1, 1, 1}}, // eltw_stride
 416                                {1, 1, 1, 1},   // stride
 417                                {0, 0, 0, 0},   // input_offset
 418                                {1, 1, 1, 1},   // dilation
 419                                false,          // conv_with_activation
 420                                0.0f,           // con_activation_slp
 421                                true,           // eltw_activation
 422                                0.0f));         // eltw_activation_slp
 423 }
 424
 425 class FusedConvTest_i8_to_u8_quantized : public FusedConvTest<int8_t, uint8_t>
 426 {};
 427
 428 TEST_F(FusedConvTest_i8_to_u8_quantized, basic) {
 429     add_feature({125, 125, 0, 1}, {2, 0, 1}, 1, ignore, ignore, {-10, -10}, 1, {241, 242});
 430     add_feature({125, 125, 0, 1}, {2, 0, 1}, 0, ignore, ignore, {-10, -11}, 2, {480, 480});
 431
 432     do_test(fused_conv_eltwise("fused_conv",
 433                                "input",
 434                                "sum_input",
 435                                eltwise_mode::sum,
 436                                {"weights"},
 437                                {"biases"},
 438                                {},   // input_quant_factors
 439                                {},   // conv_output_calibration
 440                                1.0f, // conv_i_quantization_factor
 441                                1.0f, // non_conv_scale
 442                                "eltw_output_calibration",
 443                                std::vector<tensor>{tensor{1, 1, 1, 1}}, // eltw_stride
 444                                tensor{1, 1, 1, 1},   // stride
 445                                tensor{0, 0, 0, 0},   // input_offset
 446                                tensor{1, 1, 1, 1},   // dilation
 447                                false,          // conv_with_activation
 448                                0.0f,           // con_activation_slp
 449                                true,           // eltw_activation
 450                                0.0f,           // eltw_activation_slp
 451                                padding(),
 452                                optional_data_type{data_types::u8}));
 453 }
 454
 455 class FusedConvTest_i8_to_u8_no_eltw_calibration
 456     : public FusedConvTest<int8_t, uint8_t>
 457 {};
 458
 459 TEST_F(FusedConvTest_i8_to_u8_no_eltw_calibration, basic) {
 460     const float non_conv_scale = 1.0f / 3.0f;
 461
 462     add_feature({124, 124, 0, -4},             // input
 463                 {2, 0, 1},                     // weights
 464                 4,                             // bias
 465                 0.5f,                          // conv_input_quant
 466                 ignore,                        // conv_output_calibration
 467                 {-60, -60},                    // non_conv_input
 468                 ignore,                        // eltw_output_calibration
 469                 {252 / 2 - 20, 248 / 2 - 20}); // output_pre_relu
 470
 471     add_feature({3, 3, 1, 1}, // input
 472                 {2, 0, 1},    // weights
 473                 0,            // bias
 474                 1.0f / 3.0f,  // conv_input_quant
 475                 ignore,       // conv_output_calibration
 476                 {1, 1},       // eltw_sum_input
 477                 ignore,       // eltw_output_calibration
 478                 // TODO: Do we really need that round? Should it be "3" instead?
 479                 // { round(2.333) + round (0.333) }
 480                 {2, 2}); // output_pre_relu
 481
 482     do_test(fused_conv_eltwise("fused_conv",
 483                                "input",
 484                                "sum_input",
 485                                eltwise_mode::sum,
 486                                {"weights"},
 487                                {"biases"},
 488                                {"input_quant_factors"},
 489                                {}, // conv_output_calibration
 490                                1.0f, // conv_i_quantization_factor
 491                                non_conv_scale,
 492                                {},             // eltw_output_calibration
 493                                std::vector<tensor>{tensor{1, 1, 1, 1}}, // eltw_stride
 494                                tensor{1, 1, 1, 1},   // stride
 495                                tensor{0, 0, 0, 0},   // input_offset
 496                                tensor{1, 1, 1, 1},   // dilation
 497                                false,          // conv_with_activation
 498                                0.0f,           // con_activation_slp
 499                                true,           // eltw_activation
 500                                0.0f,           // eltw_activation_slp
 501                                padding(),
 502                                optional_data_type{data_types::u8}));
 503 }