inference-engine/thirdparty/clDNN/tests/test_cases/fused_conv_eltwise_gpu_test.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18 #include <gtest/gtest.h>
  19 #include "api/CPP/memory.hpp"
  20 #include <api/CPP/input_layout.hpp>
  21 #include "api/CPP/convolution.hpp"
  22 #include "api/CPP/eltwise.hpp"
  23 #include "api/CPP/reorder.hpp"
  24 #include <api/CPP/topology.hpp>
  25 #include <api/CPP/network.hpp>
  26 #include <api/CPP/engine.hpp>
  27 #include "test_utils/test_utils.h"
  28 #include <api/CPP/data.hpp>
  29
  30 #include <api_extension/CPP/fused_conv_eltwise.hpp>
  31
  32 #include <cassert>
  33 #include <cmath>
  34 #include <gmock/gmock.h>
  35 #include <limits>
  36
  37 using namespace cldnn;
  38 using namespace tests;
  39 using namespace testing;
  40
  41 TEST(fused_conv_eltwise, basic_0)
  42 {
  43     const auto& engine = get_test_engine();
  44
  45     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 4, 5 } });
  46     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
  47
  48     set_values(input, {
  49         1.0f,  2.0f, -15.f,  3.0f, 4.0f, -15.f, 5.0f,  6.0f, -15.f, 7.0f,
  50         -15.f, 0.0f,  0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f,  1.5f,  5.2f
  51     });
  52
  53     topology topology(
  54         input_layout("input", input.get_layout()),
  55         data("weights", weights),
  56         convolution("conv", "input", { "weights" }),
  57         eltwise("eltwise", "input", "conv", eltwise_mode::sum),
  58         reorder("out", "eltwise", format::bfyx, data_types::f32));
  59
  60     build_options opt;
  61     opt.set_option(build_option::optimize_data(true));
  62     network network(engine, topology, opt);
  63     network.set_input_data("input", input);
  64
  65     auto outputs = network.execute();
  66     EXPECT_EQ(outputs.size(), size_t(1));
  67     EXPECT_EQ(outputs.begin()->first, "out");
  68
  69     auto output = outputs.begin()->second.get_memory();
  70     auto&& out_layout = output.get_layout();
  71
  72     EXPECT_EQ(out_layout.format, format::bfyx);
  73     EXPECT_EQ(out_layout.size.batch[0], 1);
  74     EXPECT_EQ(out_layout.size.feature[0], 1);
  75     EXPECT_EQ(out_layout.size.spatial[0], 4);
  76     EXPECT_EQ(out_layout.size.spatial[1], 5);
  77 }
  78
  79
  80 TEST(fused_conv_eltwise, dont_fuse_if_conv_elt_are_outputs)
  81 {
  82     const auto& engine = get_test_engine();
  83
  84     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 5 } });
  85     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
  86
  87     set_values(input, {
  88         1.0f,  2.0f, -15.f,  3.0f, 4.0f, -15.f, 5.0f,  6.0f, -15.f, 7.0f,
  89         -15.f, 0.0f,  0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f,  1.5f,  5.2f
  90         });
  91
  92     topology topology(
  93         input_layout("input", input.get_layout()),
  94         data("weights", weights),
  95         convolution("conv", "input", { "weights" }),
  96         eltwise("out", "input", "conv", eltwise_mode::sum));
  97
  98     build_options opt;
  99     opt.set_option(build_option::optimize_data(true));
 100     network network(engine, topology, opt);
 101     network.set_input_data("input", input);
 102
 103     auto outputs = network.execute();
 104     EXPECT_EQ(outputs.size(), size_t(1));
 105     EXPECT_EQ(outputs.begin()->first, "out");
 106
 107     auto output = outputs.begin()->second.get_memory();
 108     auto&& out_layout = output.get_layout();
 109
 110     EXPECT_EQ(out_layout.format, format::bfyx);
 111     EXPECT_EQ(out_layout.size.batch[0], 1);
 112     EXPECT_EQ(out_layout.size.feature[0], 1);
 113     EXPECT_EQ(out_layout.size.spatial[0], 4);
 114     EXPECT_EQ(out_layout.size.spatial[1], 5);
 115 }
 116
 117 template<typename InputTy,
 118          typename OutputTy>
 119 class FusedConvTest : public testing::Test
 120 {
 121 protected:
 122     static constexpr bool is_pure_float = std::is_same<InputTy, float>::value;
 123     using OutputPreActivationTy = typename std::conditional<is_pure_float, float, int32_t>::type;
 124     using WeightsTy = typename std::conditional<is_pure_float, float, int8_t>::type;
 125     using BiasesTy = typename std::conditional<is_pure_float, float, int32_t>::type;
 126
 127     topology the_topology;
 128
 129     std::vector<InputTy> input_values;
 130     std::vector<WeightsTy> weights_values;
 131     std::vector<BiasesTy> biases_values;
 132     // Note, not all of the quantization/calibration factors are used in all the
 133     // tests. However, I didn't come up with a way to correctly reflect that
 134     // while unifying the boileplate testing code.
 135     static constexpr float ignore = std::numeric_limits<float>::quiet_NaN();
 136     std::vector<float> input_quant_factors_values;
 137     std::vector<float> calibration_values;
 138
 139     // Eltw part.
 140     std::vector<InputTy> non_conv_input_values;
 141     std::vector<float> eltw_output_calibration_values;
 142     std::vector<OutputPreActivationTy> output_pre_relu;
 143
 144     void add_feature(std::vector<InputTy> input,
 145                      std::vector<WeightsTy> weights,
 146                      BiasesTy bias,
 147                      float input_quant_factor,
 148                      float conv_calibration,
 149                      std::vector<InputTy> non_conv_input,
 150                      float eltw_output_calibration,
 151                      std::vector<OutputPreActivationTy> output)
 152     {
 153         assert(non_conv_input.size() == output.size());
 154         input_values.insert(input_values.end(), input.begin(), input.end());
 155         weights_values.insert(
 156             weights_values.end(), weights.begin(), weights.end());
 157         biases_values.push_back(bias);
 158         input_quant_factors_values.push_back(input_quant_factor);
 159         calibration_values.push_back(conv_calibration);
 160         non_conv_input_values.insert(non_conv_input_values.end(),
 161                                      non_conv_input.begin(),
 162                                      non_conv_input.end());
 163         eltw_output_calibration_values.push_back(eltw_output_calibration);
 164         output_pre_relu.insert(
 165             output_pre_relu.end(), output.begin(), output.end());
 166     }
 167
 168     void do_test(const fused_conv_eltwise& fused_prim)
 169     {
 170         const auto& engine = get_test_engine();
 171
 172         int n_features = static_cast<int>(biases_values.size());
 173
 174         auto input_shape = tensor(1, n_features, 4, 1);
 175         auto weights_shape = tensor(n_features, n_features, 3, 1);
 176         auto biases_shape = tensor(1, 1, n_features, 1);
 177         auto sum_input_shape = tensor(1, n_features, 2, 1);
 178
 179         auto input = memory::allocate(
 180             engine,
 181             {type_to_data_type<InputTy>::value, format::bfyx, input_shape});
 182         auto weights = memory::allocate(
 183             engine,
 184             {type_to_data_type<WeightsTy>::value, format::bfyx, weights_shape});
 185
 186         auto biases = memory::allocate(
 187             engine,
 188             {type_to_data_type<BiasesTy>::value, format::bfyx, biases_shape});
 189         auto input_quant_factors = memory::allocate(
 190             engine, {data_types::f32, format::bfyx, biases_shape});
 191         auto conv_output_calibration = memory::allocate(
 192             engine, {data_types::f32, format::bfyx, biases_shape});
 193         auto sum_input = memory::allocate(
 194             engine,
 195             {type_to_data_type<InputTy>::value, format::bfyx, sum_input_shape});
 196         auto eltw_output_calibration = memory::allocate(
 197             engine, {data_types::f32, format::bfyx, biases_shape});
 198
 199         set_values(input, input_values);
 200         std::vector<WeightsTy> post_processed_weights_values(n_features
 201                                                              * n_features * 3);
 202         for (int output_feature = 0; output_feature < n_features; ++output_feature)
 203             for (int input_feature = 0; input_feature < n_features;
 204                  ++input_feature)
 205                 for (int x = 0; x < 3; ++x)
 206                 {
 207                     int idx =
 208                         output_feature * n_features * 3 + input_feature * 3 + x;
 209                     if (input_feature == output_feature)
 210                         post_processed_weights_values[idx] =
 211                             weights_values[input_feature * 3 + x];
 212                     else
 213                         post_processed_weights_values[idx] = 0;
 214                 }
 215         set_values(weights, post_processed_weights_values);
 216         set_values(biases, biases_values);
 217         set_values(input_quant_factors, input_quant_factors_values);
 218         set_values(conv_output_calibration, calibration_values);
 219         set_values(sum_input, non_conv_input_values);
 220         set_values(eltw_output_calibration, eltw_output_calibration_values);
 221
 222         the_topology.add(input_layout("input", input.get_layout()));
 223         the_topology.add(data("weights", weights));
 224         the_topology.add(data("biases", biases));
 225         the_topology.add(data("sum_input", sum_input));
 226         the_topology.add(data("input_quant_factors", input_quant_factors));
 227         the_topology.add(data("conv_output_calibration", conv_output_calibration));
 228         the_topology.add(data("eltw_output_calibration", eltw_output_calibration));
 229         the_topology.add(fused_prim);
 230
 231         build_options opts;
 232         opts.set_option(build_option::optimize_data(false));
 233
 234         network network(engine, the_topology, opts);
 235         network.set_input_data("input", input);
 236
 237         auto outputs = network.execute();
 238
 239         auto output_memory = outputs.at("fused_conv").get_memory();
 240         auto output_layout = output_memory.get_layout();
 241         auto output_ptr = output_memory.pointer<OutputTy>();
 242         int y_size = output_layout.size.spatial[1];
 243         int x_size = output_layout.size.spatial[0];
 244         int f_size = output_layout.size.feature[0];
 245         int b_size = output_layout.size.batch[0];
 246         EXPECT_EQ(output_layout.format, format::bfyx);
 247         EXPECT_EQ(y_size, 1);
 248         EXPECT_EQ(x_size, 2);
 249         EXPECT_EQ(f_size, n_features);
 250         EXPECT_EQ(b_size, 1);
 251
 252         for (int f = 0; f < f_size; f++)
 253             for (int x = 0; x < x_size; ++x)
 254             {
 255                 // printf("f: %d, x: %d\n", f, x);
 256                 OutputPreActivationTy expected =
 257                     pre_relu_to_output(output_pre_relu[f * x_size + x]);
 258                 auto actual = static_cast<OutputPreActivationTy>(
 259                     output_ptr[f * x_size + x]);
 260                 expect_eq(expected, actual);
 261             }
 262     }
 263
 264 private:
 265     template<typename T = OutputPreActivationTy>
 266     static typename std::enable_if<std::is_floating_point<T>::value>::type
 267     expect_eq(const OutputPreActivationTy& lhs, const OutputPreActivationTy& rhs)
 268     {
 269         EXPECT_NEAR(lhs, rhs, 0.001f);
 270     }
 271
 272     template<typename T = OutputPreActivationTy>
 273     static typename std::enable_if<std::is_integral<T>::value>::type
 274     expect_eq(const OutputPreActivationTy& lhs, const OutputPreActivationTy& rhs)
 275     {
 276         EXPECT_EQ(lhs, rhs);
 277     }
 278
 279     template <typename T>
 280     static T pre_relu_to_output(T pre_relu) {
 281       // No std::clamp before C++17 :(
 282       return std::min(
 283           static_cast<T>(std::numeric_limits<OutputTy>::max()),
 284           std::max(static_cast<T>(std::numeric_limits<OutputTy>::lowest()),
 285                    std::max(static_cast<T>(0), pre_relu)));
 286     }
 287 };
 288
 289 class FusedConvTest_all_float : public FusedConvTest<float, float>
 290 {};
 291
 292 TEST_F(FusedConvTest_all_float, basic) {
 293     add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
 294                 {2.0f, 0.0f, 1.0f},           // weights
 295                 1.0f,                         // bias
 296                 1.0f,                         // conv_input_quant
 297                 1.0f,                         // conv_output_calibration
 298                 {-10.0f, -10.0f},             // non_conv_input
 299                 1.0f,                         // eltw_output_calibration
 300                 {241.0f, 242.0f});            // output_pre_relu
 301
 302     add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
 303                 {2.0f, 0.0f, 1.0f},           // weights
 304                 0.0f,                         // bias
 305                 1.0f,                         // conv_input_quant
 306                 1.0f,                         // conv_output_calibration
 307                 {-10.0f, -11.0f},             // non_conv_input
 308                 2.0f,                         // eltw_output_calibration
 309                 {480.0f, 480.0f});            // output_pre_relu
 310
 311     do_test(fused_conv_eltwise("fused_conv",
 312                                "input",
 313                                "sum_input",
 314                                eltwise_mode::sum,
 315                                {"weights"},
 316                                {"biases"},
 317                                {"input_quant_factors"},
 318                                {"conv_output_calibration"},
 319                                1.0f, // conv_i_quantization_factor
 320                                1.0f, // non_conv_scale
 321                                "eltw_output_calibration",
 322                                {{1, 1, 1, 1}}, // eltw_stride
 323                                {1, 1, 1, 1},   // stride
 324                                {0, 0, 0, 0},   // input_offset
 325                                {1, 1, 1, 1},   // dilation
 326                                false,          // conv_with_activation
 327                                0.0f,           // con_activation_slp
 328                                true,           // eltw_activation
 329                                0.0f));         // eltw_activation_slp
 330 }
 331
 332 class FusedConvTest_no_conv_calibration : public FusedConvTest<float, float>
 333 {};
 334
 335 TEST_F(FusedConvTest_no_conv_calibration, basic) {
 336     // That might happen if both conv output and non-conv input happen to be
 337     // normalized to the same dynamic range of if tensor-wise (instead of
 338     // per-channel) calibration is used. Also, a similar thing might happen for
 339     // a convolution with calibration without quantization (which is the real
 340     // target of this test, needed for the Inference Engine).
 341
 342     // add_feature contains data for conv quantization/calibration, but the
 343     // primitive won't use it. It's just much easier to unify different tests
 344     // this way.
 345     add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
 346                 {2.0f, 0.0f, 1.0f},           // weights
 347                 1.0f,                         // bias
 348                 1.0f,                         // conv_input_quant
 349                 ignore,                       // conv_output_calibration
 350                 {-10.0f, -10.0f},             // non_conv_input
 351                 1.0f,                         // eltw_output_calibration
 352                 {241.0f, 242.0f});            // output_pre_relu
 353
 354     add_feature({125.0f, 125.0f, 0.0f, 1.0f}, // input
 355                 {2.0f, 0.0f, 1.0f},           // weights
 356                 0.0f,                         // bias
 357                 1.0f,                         // conv_input_quant
 358                 ignore,                       // conv_output_calibration
 359                 {-10.0f, -11.0f},             // non_conv_input
 360                 2.0f,                         // eltw_output_calibration
 361                 {480.0f, 480.0f});            // output_pre_relu
 362
 363     do_test(fused_conv_eltwise("fused_conv",
 364                                "input",
 365                                "sum_input",
 366                                eltwise_mode::sum,
 367                                {"weights"},
 368                                {"biases"},
 369                                {"input_quant_factors"},
 370                                {},   // conv_output_calibration
 371                                1.0f, // conv_i_quantization_factor
 372                                1.0f, // non_conv_scale
 373                                "eltw_output_calibration",
 374                                {{1, 1, 1, 1}}, // eltw_stride
 375                                {1, 1, 1, 1},   // stride
 376                                {0, 0, 0, 0},   // input_offset
 377                                {1, 1, 1, 1},   // dilation
 378                                false,          // conv_with_activation
 379                                0.0f,           // con_activation_slp
 380                                true,           // eltw_activation
 381                                0.0f));         // eltw_activation_slp
 382 }
 383
 384 class FusedConvTest_non_conv_scale_per_primitive : public FusedConvTest<int8_t, int8_t>
 385 {};
 386
 387 TEST_F(FusedConvTest_non_conv_scale_per_primitive, basic) {
 388     // NOTE: The data in add_feature calls implicitly assumes this!
 389     const float non_conv_scale = 2.0f; // TODO: Need per-channel too?
 390
 391     // Check that the output precision is `u8` indeed. If it was not, than 251
 392     // would eighter be rounded to 250 or 252. Ensure it's not the case and the
 393     // outputs actually differ.
 394     add_feature({125, 125, 0, 1}, {2, 0, 1}, 1, 1.0f, ignore, {-10, -10}, 1.0f, {231, 232});
 395     add_feature({125, 125, 0, 1}, {2, 0, 1}, 0, 1.0f, ignore, {-10, -10}, 1.0f, {230, 231});
 396
 397     // Verify that activation is done before the final calibration+type
 398     // conversion (in other words, in higher precision than the output).
 399     add_feature({0, 50, 0, -50}, {0, 4, 4}, 1, 1.0f, ignore, {-10, -10}, 1.0f, {181, -219});
 400     add_feature({0, 50, 0, -50}, {0, 4, 4}, 1, 1.0f, ignore, {-5, -5}, 1.0f, {191, -209});
 401
 402     // Same but with non-unit calibration (just in case).
 403     add_feature({0, 50, 0, -50}, {0, 8, 8}, 2, 1.0f, ignore, {10, 10}, 0.5f, {211, -189});
 404
 405     do_test(fused_conv_eltwise("fused_conv",
 406                                "input",
 407                                "sum_input",
 408                                eltwise_mode::sum,
 409                                {"weights"},
 410                                {"biases"},
 411                                {"input_quant_factors"},
 412                                {},   // conv_output_calibration
 413                                1.0f, // conv_i_quantization_factor
 414                                non_conv_scale, // non_conv_scale
 415                                "eltw_output_calibration",
 416                                {{1, 1, 1, 1}}, // eltw_stride
 417                                {1, 1, 1, 1},   // stride
 418                                {0, 0, 0, 0},   // input_offset
 419                                {1, 1, 1, 1},   // dilation
 420                                false,          // conv_with_activation
 421                                0.0f,           // con_activation_slp
 422                                true,           // eltw_activation
 423                                0.0f));         // eltw_activation_slp
 424 }
 425
 426 class FusedConvTest_i8_to_u8_quantized : public FusedConvTest<int8_t, uint8_t>
 427 {};
 428
 429 TEST_F(FusedConvTest_i8_to_u8_quantized, basic) {
 430     add_feature({125, 125, 0, 1}, {2, 0, 1}, 1, ignore, ignore, {-10, -10}, 1, {241, 242});
 431     add_feature({125, 125, 0, 1}, {2, 0, 1}, 0, ignore, ignore, {-10, -11}, 2, {480, 480});
 432
 433     do_test(fused_conv_eltwise("fused_conv",
 434                                "input",
 435                                "sum_input",
 436                                eltwise_mode::sum,
 437                                {"weights"},
 438                                {"biases"},
 439                                {},   // input_quant_factors
 440                                {},   // conv_output_calibration
 441                                1.0f, // conv_i_quantization_factor
 442                                1.0f, // non_conv_scale
 443                                "eltw_output_calibration",
 444                                std::vector<tensor>{tensor{1, 1, 1, 1}}, // eltw_stride
 445                                tensor{1, 1, 1, 1},   // stride
 446                                tensor{0, 0, 0, 0},   // input_offset
 447                                tensor{1, 1, 1, 1},   // dilation
 448                                false,          // conv_with_activation
 449                                0.0f,           // con_activation_slp
 450                                true,           // eltw_activation
 451                                0.0f,           // eltw_activation_slp
 452                                padding(),
 453                                optional_data_type{data_types::u8}));
 454 }
 455
 456 class FusedConvTest_i8_to_u8_no_eltw_calibration
 457     : public FusedConvTest<int8_t, uint8_t>
 458 {};
 459
 460 TEST_F(FusedConvTest_i8_to_u8_no_eltw_calibration, basic) {
 461     const float non_conv_scale = 1.0f / 3.0f;
 462
 463     add_feature({124, 124, 0, -4},             // input
 464                 {2, 0, 1},                     // weights
 465                 4,                             // bias
 466                 0.5f,                          // conv_input_quant
 467                 ignore,                        // conv_output_calibration
 468                 {-60, -60},                    // non_conv_input
 469                 ignore,                        // eltw_output_calibration
 470                 {252 / 2 - 20, 248 / 2 - 20}); // output_pre_relu
 471
 472     add_feature({3, 3, 1, 1}, // input
 473                 {2, 0, 1},    // weights
 474                 0,            // bias
 475                 1.0f / 3.0f,  // conv_input_quant
 476                 ignore,       // conv_output_calibration
 477                 {1, 1},       // eltw_sum_input
 478                 ignore,       // eltw_output_calibration
 479                 // TODO: Do we really need that round? Should it be "3" instead?
 480                 // { round(2.333) + round (0.333) }
 481                 {2, 2}); // output_pre_relu
 482
 483     do_test(fused_conv_eltwise("fused_conv",
 484                                "input",
 485                                "sum_input",
 486                                eltwise_mode::sum,
 487                                {"weights"},
 488                                {"biases"},
 489                                {"input_quant_factors"},
 490                                {}, // conv_output_calibration
 491                                1.0f, // conv_i_quantization_factor
 492                                non_conv_scale,
 493                                {},             // eltw_output_calibration
 494                                std::vector<tensor>{tensor{1, 1, 1, 1}}, // eltw_stride
 495                                tensor{1, 1, 1, 1},   // stride
 496                                tensor{0, 0, 0, 0},   // input_offset
 497                                tensor{1, 1, 1, 1},   // dilation
 498                                false,          // conv_with_activation
 499                                0.0f,           // con_activation_slp
 500                                true,           // eltw_activation
 501                                0.0f,           // eltw_activation_slp
 502                                padding(),
 503                                optional_data_type{data_types::u8}));
 504 }