inference-engine/thirdparty/clDNN/tests/test_cases/scale_gpu_test.cpp

   1 /*
   2 // Copyright (c) 2017 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18 #include <gtest/gtest.h>
  19 #include "api/CPP/memory.hpp"
  20 #include <api/CPP/input_layout.hpp>
  21 #include "api/CPP/scale.hpp"
  22 #include <api/CPP/topology.hpp>
  23 #include <api/CPP/network.hpp>
  24 #include <api/CPP/engine.hpp>
  25 #include "test_utils/test_utils.h"
  26 #include "api/CPP/reorder.hpp"
  27
  28 #include <iostream>
  29
  30 using namespace cldnn;
  31 using namespace tests;
  32
  33 TEST(scale_gpu, basic_in2x3x2x2_scale_same_size) {
  34     //  Scale  : 2x3x2x2
  35     //  Input  : 2x3x2x2
  36     //  Output : 2x3x2x2
  37
  38     //  Input:
  39     //  f0: b0:  1    2  -10   b1:   0    0    -11
  40     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
  41     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
  42     //  f1: b0:  7    8  -16   b1:   12   8    -17
  43     //
  44     //  Scale:
  45     //  f0: b0:  0.1    0.2  0.25   b1:   0.3   0.4   0.5
  46     //  f0: b0:  0.6    0.7  0.75   b1:   0.8   0.9   1
  47     //  f1: b0:  1.1    1.2  1.25   b1:   1.3   1.4   1.5
  48     //  f1: b0:  1.6    1.7  1.75   b1:   1.8   1.9   2
  49
  50     const auto& engine = get_test_engine();
  51
  52     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } });
  53     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } });
  54
  55     topology topology;
  56     topology.add(input_layout("input", input.get_layout()));
  57     topology.add(input_layout("scale_input", scale_input.get_layout()));
  58     topology.add(scale("scale", "input", "scale_input"));
  59
  60     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
  61         2.f, 0.f, 6.f, 5.2f,
  62         -10.f, -11.f, -12.f, -13.f,
  63         3.f, 0.5f, 7.f, 12.f,
  64         4.f, -0.5f, 8.f, 8.f,
  65         -14.f, -15.f, -16.f, -17.f };
  66     set_values(input, input_vec);
  67
  68     std::vector<float> scale_input_vec = {
  69         0.1f, 0.3f, 1.1f, 1.3f,
  70         0.2f, 0.4f, 1.2f, 1.4f,
  71         0.25f, 0.5f, 1.25f, 1.5f,
  72         0.6f, 0.8f, 1.6f, 1.8f,
  73         0.7f, 0.9f, 1.7f, 1.9f,
  74         0.75f, 1.f, 1.75f, 2.f
  75     };
  76     set_values(scale_input, scale_input_vec);
  77
  78     network network(engine, topology);
  79
  80     network.set_input_data("input", input);
  81     network.set_input_data("scale_input", scale_input);
  82
  83     auto outputs = network.execute();
  84
  85     auto output = outputs.at("scale").get_memory();
  86     auto output_ptr = output.pointer<float>();
  87
  88     for (unsigned int i = 0; i < input_vec.size(); ++i) {
  89         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_input_vec[i], 1e-05F);
  90     }
  91 }
  92
  93 TEST(scale_gpu, basic_in2x3x2x2_scale_same_size_bfyx) {
  94     //  Scale  : 2x3x2x2
  95     //  Input  : 2x3x2x2
  96     //  Output : 2x3x2x2
  97
  98     //  Input:
  99     //  f0: b0:  1    2  -10   b1:   0    0    -11
 100     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 101     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 102     //  f1: b0:  7    8  -16   b1:   12   8    -17
 103     //
 104     //  Scale:
 105     //  f0: b0:  0.1    0.2  0.25   b1:   0.3   0.4   0.5
 106     //  f0: b0:  0.6    0.7  0.75   b1:   0.8   0.9   1
 107     //  f1: b0:  1.1    1.2  1.25   b1:   1.3   1.4   1.5
 108     //  f1: b0:  1.6    1.7  1.75   b1:   1.8   1.9   2
 109
 110     const auto& engine = get_test_engine();
 111
 112     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } });
 113     auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } });
 114
 115     topology topology;
 116     topology.add(input_layout("input", input.get_layout()));
 117     topology.add(input_layout("scale_input", scale_input.get_layout()));
 118     topology.add(scale("scale", "input", "scale_input"));
 119
 120     std::vector<float> input_vec = {
 121         1.f, 2.f, -10.f, 0.f, 0.f, -11.f,
 122         3.f, 4.f, -14.f, 0.5f, -0.5f, -15.f,
 123         5.f, 6.f, -12.f, 1.5f, 5.2f, -13.f,
 124         7.f, 8.f, -16.f, 12.f, 8.f, -17.f
 125     };
 126     set_values(input, input_vec);
 127
 128     std::vector<float> scale_input_vec = {
 129         0.1f, 0.2f, 0.25f, 0.3f, 0.4f, 0.5f,
 130         0.6f, 0.7f, 0.75f, 0.8f, 0.9f, 1.f,
 131         1.1f, 1.2f, 1.25f, 1.3f, 1.4f, 1.5f,
 132         1.6f, 1.7f, 1.75f, 1.8f, 1.9f, 2.f
 133     };
 134     set_values(scale_input, scale_input_vec);
 135
 136     network network(engine, topology);
 137
 138     network.set_input_data("input", input);
 139     network.set_input_data("scale_input", scale_input);
 140
 141     auto outputs = network.execute();
 142
 143     auto output = outputs.at("scale").get_memory();
 144     auto output_ptr = output.pointer<float>();
 145
 146     for (unsigned int i = 0; i < input_vec.size(); ++i) {
 147         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_input_vec[i], 1e-05F);
 148     }
 149 }
 150
 151 TEST(scale_gpu, basic_in2x3x2x2_scale_same_size_scale_bfyx) {
 152     //  Scale  : 2x3x2x2
 153     //  Input  : 2x3x2x2
 154     //  Output : 2x3x2x2
 155
 156     //  Input:
 157     //  f0: b0:  1    2  -10   b1:   0    0    -11
 158     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 159     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 160     //  f1: b0:  7    8  -16   b1:   12   8    -17
 161     //
 162     //  Scale:
 163     //  f0: b0:  0.1    0.2  0.25   b1:   0.3   0.4   0.5
 164     //  f0: b0:  0.6    0.7  0.75   b1:   0.8   0.9   1
 165     //  f1: b0:  1.1    1.2  1.25   b1:   1.3   1.4   1.5
 166     //  f1: b0:  1.6    1.7  1.75   b1:   1.8   1.9   2
 167
 168     const auto& engine = get_test_engine();
 169
 170     auto batch_num = 2;
 171     auto feature_num = 2;
 172     auto x_size = 3;
 173     auto y_size = 2;
 174
 175     auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } });
 176     auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size, y_size } });
 177
 178     topology topology;
 179     topology.add(input_layout("input", input.get_layout()));
 180     topology.add(input_layout("scale_input", scale_input.get_layout()));
 181     topology.add(scale("scale", "input", "scale_input"));
 182
 183     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
 184         2.f, 0.f, 6.f, 5.2f,
 185         -10.f, -11.f, -12.f, -13.f,
 186         3.f, 0.5f, 7.f, 12.f,
 187         4.f, -0.5f, 8.f, 8.f,
 188         -14.f, -15.f, -16.f, -17.f };
 189     set_values(input, input_vec);
 190
 191     std::vector<float> scale_input_vec = {
 192         0.1f, 0.2f, 0.25f, 0.3f, 0.4f, 0.5f,
 193         0.6f, 0.7f, 0.75f, 0.8f, 0.9f, 1.f,
 194         1.1f, 1.2f, 1.25f, 1.3f, 1.4f, 1.5f,
 195         1.6f, 1.7f, 1.75f, 1.8f, 1.9f, 2.f
 196     };
 197     set_values(scale_input, scale_input_vec);
 198
 199     network network(engine, topology);
 200
 201     network.set_input_data("input", input);
 202     network.set_input_data("scale_input", scale_input);
 203
 204     auto outputs = network.execute();
 205
 206     auto output = outputs.at("scale").get_memory();
 207     auto output_ptr = output.pointer<float>();
 208
 209     for (int j = 0; j < feature_num; ++j) { //F
 210         for (int i = 0; i < batch_num; ++i) { //B
 211             for (int k = 0; k < y_size; ++k) { //Y
 212                 for (int l = 0; l < x_size; ++l) { //X
 213                     int linear_id = i + batch_num * (j + feature_num * (l + x_size * k));
 214                     int linear_id_scale = l + x_size * (k + y_size * (j + i * feature_num));
 215                     EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F);
 216                 }
 217             }
 218         }
 219     }
 220 }
 221
 222 TEST(scale_gpu, basic_in2x3x2x2_scale_same_size_bias_term) {
 223     //  Scale  : 2x3x2x2
 224     //  Bias   : 2x3x2x2
 225     //  Input  : 2x3x2x2
 226     //  Output : 2x3x2x2
 227
 228     //  Input:
 229     //  f0: b0:  1    2  -10   b1:   0    0    -11
 230     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 231     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 232     //  f1: b0:  7    8  -16   b1:   12   8    -17
 233     //
 234     //  Scale:
 235     //  f0: b0:  0.1    0.2  0.25   b1:   0.3   0.4   0.5
 236     //  f0: b0:  0.6    0.7  0.75   b1:   0.8   0.9   1
 237     //  f1: b0:  1.1    1.2  1.25   b1:   1.3   1.4   1.5
 238     //  f1: b0:  1.6    1.7  1.75   b1:   1.8   1.9   2
 239     //
 240     //  Bias:
 241     //  f0: b0:  1.1    1.2  1.25   b1:   1.3   1.4   1.5
 242     //  f0: b0:  2.6    2.7  2.75   b1:   2.8   2.9   2
 243     //  f1: b0:  3.1    3.2  3.25   b1:   3.3   3.4   3.5
 244     //  f1: b0:  4.6    4.7  4.75   b1:   4.8   4.9   4
 245
 246     const auto& engine = get_test_engine();
 247
 248     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } });
 249     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } });
 250     auto bias = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } });
 251
 252     topology topology;
 253     topology.add(input_layout("input", input.get_layout()));
 254     topology.add(input_layout("scale_input", scale_input.get_layout()));
 255     topology.add(input_layout("bias", bias.get_layout()));
 256     topology.add(scale("scale", "input", "scale_input", "bias"));
 257
 258     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
 259         2.f, 0.f, 6.f, 5.2f,
 260         -10.f, -11.f, -12.f, -13.f,
 261         3.f, 0.5f, 7.f, 12.f,
 262         4.f, -0.5f, 8.f, 8.f,
 263         -14.f, -15.f, -16.f, -17.f };
 264     set_values(input, input_vec);
 265
 266     std::vector<float> scale_input_vec = {
 267         0.1f, 0.3f, 1.1f, 1.3f,
 268         0.2f, 0.4f, 1.2f, 1.4f,
 269         0.25f, 0.5f, 1.25f, 1.5f,
 270         0.6f, 0.8f, 1.6f, 1.8f,
 271         0.7f, 0.9f, 1.7f, 1.9f,
 272         0.75f, 1.f, 1.75f, 2.f
 273     };
 274     set_values(scale_input, scale_input_vec);
 275
 276     std::vector<float> bias_vec = {
 277         1.1f, 2.3f, 3.1f, 4.3f,
 278         1.2f, 2.4f, 3.2f, 4.4f,
 279         1.25f, 2.5f, 3.25f, 4.5f,
 280         1.6f, 2.8f, 3.6f, 4.8f,
 281         1.7f, 2.9f, 3.7f, 4.9f,
 282         1.75f, 2.f, 3.75f, 4.f
 283     };
 284     set_values(bias, bias_vec);
 285
 286     network network(engine, topology);
 287
 288     network.set_input_data("input", input);
 289     network.set_input_data("scale_input", scale_input);
 290     network.set_input_data("bias", bias);
 291
 292     auto outputs = network.execute();
 293
 294     auto output = outputs.at("scale").get_memory();
 295     auto output_ptr = output.pointer<float>();
 296
 297     for (unsigned int i = 0; i < input_vec.size(); ++i) {
 298         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_input_vec[i] + bias_vec[i], 1e-05F);
 299     }
 300 }
 301
 302 TEST(scale_gpu, basic_in2x3x2x2_scale_scalar) {
 303     //  Scale  : 1
 304     //  Input  : 2x3x2x2
 305     //  Output : 2x3x2x2
 306
 307     //  Input:
 308     //  f0: b0:  1    2  -10   b1:   0    0    -11
 309     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 310     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 311     //  f1: b0:  7    8  -16   b1:   12   8    -17
 312     //
 313     //  Scale:
 314     //  0.1    0.2
 315
 316     const auto& engine = get_test_engine();
 317
 318     auto batch_num = 2;
 319     auto feature_num = 2;
 320     auto x_size = 3;
 321     auto y_size = 2;
 322
 323     auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } });
 324     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 1, 1 } });
 325
 326     topology topology;
 327     topology.add(input_layout("input", input.get_layout()));
 328     topology.add(input_layout("scale_input", scale_input.get_layout()));
 329     topology.add(scale("scale", "input", "scale_input"));
 330
 331     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
 332         2.f, 0.f, 6.f, 5.2f,
 333         -10.f, -11.f, -12.f, -13.f,
 334         3.f, 0.5f, 7.f, 12.f,
 335         4.f, -0.5f, 8.f, 8.f,
 336         -14.f, -15.f, -16.f, -17.f };
 337     set_values(input, input_vec);
 338
 339     std::vector<float> scale_input_vec = {
 340         0.1f,
 341     };
 342     set_values(scale_input, scale_input_vec);
 343
 344     network network(engine, topology);
 345
 346     network.set_input_data("input", input);
 347     network.set_input_data("scale_input", scale_input);
 348
 349     auto outputs = network.execute();
 350
 351     auto output = outputs.at("scale").get_memory();
 352     auto output_ptr = output.pointer<float>();
 353
 354     for (int j = 0; j < feature_num; ++j) { //F
 355         for (int i = 0; i < batch_num; ++i) { //B
 356             for (int k = 0; k < y_size; ++k) { //Y
 357                 for (int l = 0; l < x_size; ++l) { //X
 358                     int linear_id = i + batch_num * (j + feature_num * (l + x_size * k));
 359                     int linear_id_scale = 0;
 360                     EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F);
 361                 }
 362             }
 363         }
 364     }
 365 }
 366
 367 TEST(scale_gpu, basic_in2x3x2x2_scale_y) {
 368     //  Scale  : 2
 369     //  Input  : 2x3x2x2
 370     //  Output : 2x3x2x2
 371
 372     //  Input:
 373     //  f0: b0:  1    2  -10   b1:   0    0    -11
 374     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 375     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 376     //  f1: b0:  7    8  -16   b1:   12   8    -17
 377     //
 378     //  Scale:
 379     //  0.1    0.2
 380
 381     const auto& engine = get_test_engine();
 382
 383     auto batch_num = 2;
 384     auto feature_num = 2;
 385     auto x_size = 3;
 386     auto y_size = 2;
 387
 388     auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } });
 389     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1,1,1,y_size } });
 390
 391     topology topology;
 392     topology.add(input_layout("input", input.get_layout()));
 393     topology.add(input_layout("scale_input", scale_input.get_layout()));
 394     topology.add(scale("scale", "input", "scale_input"));
 395
 396     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
 397         2.f, 0.f, 6.f, 5.2f,
 398         -10.f, -11.f, -12.f, -13.f,
 399         3.f, 0.5f, 7.f, 12.f,
 400         4.f, -0.5f, 8.f, 8.f,
 401         -14.f, -15.f, -16.f, -17.f };
 402     set_values(input, input_vec);
 403
 404     std::vector<float> scale_input_vec = {
 405         0.1f,
 406         0.2f,
 407     };
 408     set_values(scale_input, scale_input_vec);
 409
 410     network network(engine, topology);
 411
 412     network.set_input_data("input", input);
 413     network.set_input_data("scale_input", scale_input);
 414
 415     auto outputs = network.execute();
 416
 417     auto output = outputs.at("scale").get_memory();
 418     auto output_ptr = output.pointer<float>();
 419
 420     for (int j = 0; j < feature_num; ++j) { //F
 421         for (int i = 0; i < batch_num; ++i) { //B
 422             for (int k = 0; k < y_size; ++k) { //Y
 423                 for (int l = 0; l < x_size; ++l) { //X
 424                     int linear_id = i + batch_num * (j + feature_num * (l + x_size * k));
 425                     int linear_id_scale = k;
 426                     EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F);
 427                 }
 428             }
 429         }
 430     }
 431 }
 432
 433 TEST(scale_gpu, basic_in2x3x2x2_scale_fb) {
 434     //  Scale  : 2x3x2x2
 435     //  Input  : 2x3x2x2
 436     //  Output : 2x3x2x2
 437
 438     //  Input:
 439     //  f0: b0:  1    2  -10   b1:   0    0    -11
 440     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 441     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 442     //  f1: b0:  7    8  -16   b1:   12   8    -17
 443     //
 444     //  Scale: per feature per batch
 445     //  f0b0: 0.1   f0b1: 0.2
 446     //  f1b0: 0.5   f1b1: 2.0
 447
 448     const auto& engine = get_test_engine();
 449
 450     auto batch_num = 2;
 451     auto feature_num = 2;
 452     auto x_size = 3;
 453     auto y_size = 2;
 454
 455     auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } });
 456     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, 1, 1 } });
 457
 458     topology topology;
 459     topology.add(input_layout("input", input.get_layout()));
 460     topology.add(input_layout("scale_input", scale_input.get_layout()));
 461     topology.add(scale("scale", "input", "scale_input"));
 462
 463     std::vector<float> input_vec = {
 464         1.f, 0.f, 5.f, 1.5f,
 465         2.f, 0.f, 6.f, 5.2f,
 466         -10.f, -11.f, -12.f, -13.f,
 467         3.f, 0.5f, 7.f, 12.f,
 468         4.f, -0.5f, 8.f, 8.f,
 469         -14.f, -15.f, -16.f, -17.f };
 470     set_values(input, input_vec);
 471
 472     std::vector<float> scale_input_vec = {
 473         0.1f, 0.2f, 0.5f, 2.0f,
 474     };
 475     set_values(scale_input, scale_input_vec);
 476
 477     network network(engine, topology);
 478
 479     network.set_input_data("input", input);
 480     network.set_input_data("scale_input", scale_input);
 481
 482     auto outputs = network.execute();
 483
 484     auto output = outputs.at("scale").get_memory();
 485     auto output_ptr = output.pointer<float>();
 486
 487     for (int j = 0; j < feature_num; ++j) { //F
 488         for (int i = 0; i < batch_num; ++i) { //B
 489             for (int k = 0; k < y_size; ++k) { //Y
 490                 for (int l = 0; l < x_size; ++l) { //X
 491                     int linear_id = i + batch_num * (j + feature_num * (l + x_size * k));
 492                     int linear_id_scale = i + feature_num * j;
 493                     EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F);
 494                 }
 495             }
 496         }
 497     }
 498 }
 499
 500 TEST(scale_gpu, basic_in2x3x2x2_scale_f) {
 501     //  Scale  : 2x3x2x2
 502     //  Input  : 2x3x2x2
 503     //  Output : 2x3x2x2
 504
 505     //  Input:
 506     //  f0: b0:  1    2  -10   b1:   0    0    -11
 507     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 508     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 509     //  f1: b0:  7    8  -16   b1:   12   8    -17
 510     //
 511     //  Scale: per feature
 512     //  f0bx: 0.1   f1bx: 0.2
 513
 514     const auto& engine = get_test_engine();
 515
 516     auto batch_num = 2;
 517     auto feature_num = 2;
 518     auto x_size = 3;
 519     auto y_size = 2;
 520
 521     auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } });
 522     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, feature_num, 1, 1 } });
 523
 524     topology topology;
 525     topology.add(input_layout("input", input.get_layout()));
 526     topology.add(input_layout("scale_input", scale_input.get_layout()));
 527     topology.add(scale("scale", "input", "scale_input"));
 528
 529     std::vector<float> input_vec = {
 530         1.f, 0.f, 5.f, 1.5f,
 531         2.f, 0.f, 6.f, 5.2f,
 532         -10.f, -11.f, -12.f, -13.f,
 533         3.f, 0.5f, 7.f, 12.f,
 534         4.f, -0.5f, 8.f, 8.f,
 535         -14.f, -15.f, -16.f, -17.f };
 536     set_values(input, input_vec);
 537
 538     std::vector<float> scale_input_vec = {
 539         //f0bx  //f1bx
 540         0.1f,   0.2f
 541     };
 542     set_values(scale_input, scale_input_vec);
 543
 544     network network(engine, topology);
 545
 546     network.set_input_data("input", input);
 547     network.set_input_data("scale_input", scale_input);
 548
 549     auto outputs = network.execute();
 550
 551     auto output = outputs.at("scale").get_memory();
 552     auto output_ptr = output.pointer<float>();
 553
 554     for (int j = 0; j < feature_num; ++j) { //F
 555         for (int i = 0; i < batch_num; ++i) { //B
 556             for (int k = 0; k < y_size; ++k) { //Y
 557                 for (int l = 0; l < x_size; ++l) { //X
 558                     int linear_id = i + batch_num * (j + feature_num * (l + x_size * k));
 559                     int linear_id_scale = j;
 560                     EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F);
 561                 }
 562             }
 563         }
 564     }
 565 }
 566
 567 TEST(scale_gpu, basic_in2x3x2x2_scale_x) {
 568     //  Scale  : 3
 569     //  Input  : 2x3x2x2
 570     //  Output : 2x3x2x2
 571
 572     //  Input:
 573     //  f0: b0:  1    2  -10   b1:   0    0    -11
 574     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 575     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 576     //  f1: b0:  7    8  -16   b1:   12   8    -17
 577     //
 578     //  Scale:
 579     //  0.1    0.2  0.25
 580
 581     const auto& engine = get_test_engine();
 582
 583     auto batch_num = 2;
 584     auto feature_num = 2;
 585     auto x_size = 3;
 586     auto y_size = 2;
 587
 588     auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } });
 589     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, x_size, 1 } });
 590
 591     topology topology;
 592     topology.add(input_layout("input", input.get_layout()));
 593     topology.add(input_layout("scale_input", scale_input.get_layout()));
 594     topology.add(scale("scale", "input", "scale_input"));
 595
 596     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
 597         2.f, 0.f, 6.f, 5.2f,
 598         -10.f, -11.f, -12.f, -13.f,
 599         3.f, 0.5f, 7.f, 12.f,
 600         4.f, -0.5f, 8.f, 8.f,
 601         -14.f, -15.f, -16.f, -17.f };
 602     set_values(input, input_vec);
 603
 604     std::vector<float> scale_input_vec = {
 605         0.1f,
 606         0.2f,
 607         0.25f
 608     };
 609     set_values(scale_input, scale_input_vec);
 610
 611     network network(engine, topology);
 612
 613     network.set_input_data("input", input);
 614     network.set_input_data("scale_input", scale_input);
 615
 616     auto outputs = network.execute();
 617
 618     auto output = outputs.at("scale").get_memory();
 619     auto output_ptr = output.pointer<float>();
 620
 621     for (int j = 0; j < feature_num; ++j) { //F
 622         for (int i = 0; i < batch_num; ++i) { //B
 623             for (int k = 0; k < y_size; ++k) { //Y
 624                 for (int l = 0; l < x_size; ++l) { //X
 625                     int linear_id = i + batch_num * (j + feature_num * (l + x_size * k));
 626                     int linear_id_scale = l;
 627                     EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F);
 628                 }
 629             }
 630         }
 631     }
 632 }
 633
 634 TEST(scale_gpu, basic_in2x3x2x2_scale_xy) {
 635     //  Scale  : 2x3x1x1
 636     //  Input  : 2x3x2x2
 637     //  Output : 2x3x2x2
 638
 639     //  Input:
 640     //  f0: b0:  1    2  -10   b1:   0    0    -11
 641     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 642     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 643     //  f1: b0:  7    8  -16   b1:   12   8    -17
 644     //
 645     //  Scale:
 646     //  f0:  0.1    0.2  0.25
 647     //  f0:  0.6    0.7  0.75
 648
 649     const auto& engine = get_test_engine();
 650
 651     auto batch_num = 2;
 652     auto feature_num = 2;
 653     auto x_size = 3;
 654     auto y_size = 2;
 655
 656     auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } });
 657     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, x_size, y_size } });
 658
 659     topology topology;
 660     topology.add(input_layout("input", input.get_layout()));
 661     topology.add(input_layout("scale_input", scale_input.get_layout()));
 662     topology.add(scale("scale", "input", "scale_input"));
 663
 664     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
 665         2.f, 0.f, 6.f, 5.2f,
 666         -10.f, -11.f, -12.f, -13.f,
 667         3.f, 0.5f, 7.f, 12.f,
 668         4.f, -0.5f, 8.f, 8.f,
 669         -14.f, -15.f, -16.f, -17.f };
 670     set_values(input, input_vec);
 671
 672     std::vector<float> scale_input_vec = {
 673         0.1f,
 674         0.2f,
 675         0.25f,
 676         0.6f,
 677         0.7f,
 678         0.75f
 679     };
 680     set_values(scale_input, scale_input_vec);
 681
 682     network network(engine, topology);
 683
 684     network.set_input_data("input", input);
 685     network.set_input_data("scale_input", scale_input);
 686
 687     auto outputs = network.execute();
 688
 689     auto output = outputs.at("scale").get_memory();
 690     auto output_ptr = output.pointer<float>();
 691
 692     for (int j = 0; j < feature_num; ++j) { //F
 693         for (int i = 0; i < batch_num; ++i) { //B
 694             for (int k = 0; k < y_size; ++k) { //Y
 695                 for (int l = 0; l < x_size; ++l) { //X
 696                     int linear_id = i + batch_num * (j + feature_num * (l + x_size * k));
 697                     int linear_id_scale = l + x_size * k;
 698                     EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F);
 699                 }
 700             }
 701         }
 702     }
 703 }
 704
 705 TEST(scale_gpu, basic_in2x3x2x2_scale_batch1) {
 706     //  Scale  : 2x3x2x1
 707     //  Input  : 2x3x2x2
 708     //  Output : 2x3x2x2
 709
 710     //  Input:
 711     //  f0: b0:  1    2  -10   b1:   0    0    -11
 712     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 713     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 714     //  f1: b0:  7    8  -16   b1:   12   8    -17
 715     //
 716     //  Scale:
 717     //  f0: b0:  0.1    0.2  0.25
 718     //  f0: b0:  0.6    0.7  0.75
 719     //  f1: b0:  1.1    1.2  1.25
 720     //  f1: b0:  1.6    1.7  1.75
 721
 722     const auto& engine = get_test_engine();
 723
 724     auto batch_num = 2;
 725     auto feature_num = 2;
 726     auto x_size = 3;
 727     auto y_size = 2;
 728
 729     auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } });
 730     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, feature_num, x_size, y_size } });
 731
 732     topology topology;
 733     topology.add(input_layout("input", input.get_layout()));
 734     topology.add(input_layout("scale_input", scale_input.get_layout()));
 735     topology.add(scale("scale", "input", "scale_input"));
 736
 737     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
 738         2.f, 0.f, 6.f, 5.2f,
 739         -10.f, -11.f, -12.f, -13.f,
 740         3.f, 0.5f, 7.f, 12.f,
 741         4.f, -0.5f, 8.f, 8.f,
 742         -14.f, -15.f, -16.f, -17.f };
 743     set_values(input, input_vec);
 744
 745     std::vector<float> scale_input_vec = {
 746         0.1f, 1.1f,
 747         0.2f, 1.2f,
 748         0.25f, 1.25f,
 749         0.6f, 1.6f,
 750         0.7f, 1.7f,
 751         0.75f, 1.75f
 752     };
 753     set_values(scale_input, scale_input_vec);
 754
 755     network network(engine, topology);
 756
 757     network.set_input_data("input", input);
 758     network.set_input_data("scale_input", scale_input);
 759
 760     auto outputs = network.execute();
 761
 762     auto output = outputs.at("scale").get_memory();
 763     auto output_ptr = output.pointer<float>();
 764
 765     for (int j = 0; j < feature_num; ++j) { //F
 766         for (int i = 0; i < batch_num; ++i) { //B
 767             for (int k = 0; k < y_size; ++k) { //Y
 768                 for (int l = 0; l < x_size; ++l) { //X
 769                     int linear_id = i + batch_num * (j + feature_num * (l + x_size * k));
 770                     int linear_id_scale = j + feature_num * (l + x_size * k);
 771                     EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F);
 772                 }
 773             }
 774         }
 775     }
 776 }
 777
 778 TEST(scale_gpu, basic_in2x3_scale_same_size_bx) {
 779     //  Scale  : 2x3
 780     //  Bias   : 2x3
 781     //  Input  : 2x3
 782     //  Output : 2x3
 783
 784     //  Input:
 785     //  b0: 1  2  -0.75
 786     //  b1: 0 -1.5  -3
 787     //
 788     //  Scale:
 789     //  b0: 3.1  0.2  0.17
 790     //  b1:  10   -3     1
 791
 792     //  Bias:
 793     //  b0: -0.1 3.2  7
 794     //  b1: 0    1   -1
 795
 796     const auto& engine = get_test_engine();
 797
 798     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } });
 799     auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } });
 800     auto bias_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } });
 801
 802     topology topology;
 803     topology.add(input_layout("input", input.get_layout()));
 804     topology.add(input_layout("scale_input", scale_input.get_layout()));
 805     topology.add(input_layout("bias_input", scale_input.get_layout()));
 806     topology.add(scale("scale", "input", "scale_input", "bias_input"));
 807
 808     std::vector<float> input_vec = {
 809         1.f, 2.f, -0.75f,
 810         0.f, -1.5f, -3.f,
 811     };
 812     set_values(input, input_vec);
 813
 814     std::vector<float> scale_vec = {
 815         3.1f, 0.2f, 0.17f,
 816         10.f, -3.f, 1.f,
 817     };
 818     set_values(scale_input, scale_vec);
 819
 820     std::vector<float> bias_vec = {
 821         -0.1f, 3.2f, 7.f,
 822         0.f, 1.f, -1.f,
 823     };
 824     set_values(bias_input, bias_vec);
 825
 826     network network(engine, topology);
 827
 828     network.set_input_data("input", input);
 829     network.set_input_data("scale_input", scale_input);
 830     network.set_input_data("bias_input", bias_input);
 831
 832     auto outputs = network.execute();
 833
 834     auto output = outputs.at("scale").get_memory();
 835     auto output_ptr = output.pointer<float>();
 836
 837     for (unsigned int i = 0; i < input_vec.size(); ++i) {
 838         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[i] + bias_vec[i], 1e-05F);
 839     }
 840 }
 841
 842 TEST(scale_gpu, basic_in2x3_scale_same_size_xb) {
 843     //  Scale  : 2x3
 844     //  Bias   : 2x3
 845     //  Input  : 2x3
 846     //  Output : 2x3
 847
 848     //  Input:
 849     //  x0: 1     2  -0.75
 850     //  x1: 0  -1.5     -3
 851     //
 852     //  Scale:
 853     //  x0: 3.1   0.2  0.17
 854     //  x1: 10     -3     1
 855
 856     //  Bias:
 857     //  x0: -0.1  3.2   7
 858     //  x1: 0       1  -1
 859
 860     const auto& engine = get_test_engine();
 861
 862     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } });
 863     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } });
 864     auto bias_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } });
 865
 866     topology topology;
 867     topology.add(input_layout("input", input.get_layout()));
 868     topology.add(input_layout("scale_input", scale_input.get_layout()));
 869     topology.add(input_layout("bias_input", scale_input.get_layout()));
 870     topology.add(scale("scale", "input", "scale_input", "bias_input"));
 871
 872     std::vector<float> input_vec = {
 873         1.f, 2.f, -0.75f,
 874         0.f, -1.5f, -3.f,
 875     };
 876     set_values(input, input_vec);
 877
 878     std::vector<float> scale_vec = {
 879         3.1f, 0.2f, 0.17f,
 880         10.f, -3.f, 1.f,
 881     };
 882     set_values(scale_input, scale_vec);
 883
 884     std::vector<float> bias_vec = {
 885         -0.1f, 3.2f, 7.f,
 886         0.f, 1.f, -1.f,
 887     };
 888     set_values(bias_input, bias_vec);
 889
 890     network network(engine, topology);
 891
 892     network.set_input_data("input", input);
 893     network.set_input_data("scale_input", scale_input);
 894     network.set_input_data("bias_input", bias_input);
 895
 896     auto outputs = network.execute();
 897
 898     auto output = outputs.at("scale").get_memory();
 899     auto output_ptr = output.pointer<float>();
 900
 901     for (unsigned int i = 0; i < input_vec.size(); ++i) {
 902         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[i] + bias_vec[i], 1e-05F);
 903     }
 904 }
 905
 906 TEST(scale_gpu, basic_in2x3_scale_single_value_bx) {
 907     //  Scale  : 1x1
 908     //  Bias   : 1x1
 909     //  Input  : 2x3
 910     //  Output : 2x3
 911
 912     //  Input:
 913     //  b0: 1    2 -0.75
 914     //  b1: 0 -1.5    -3
 915     //
 916     //  Scale:
 917     //  3.1
 918
 919     //  Bias:
 920     //  -0.1
 921
 922     const auto& engine = get_test_engine();
 923
 924     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } });
 925     auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 926     auto bias_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 927
 928     topology topology;
 929     topology.add(input_layout("input", input.get_layout()));
 930     topology.add(input_layout("scale_input", scale_input.get_layout()));
 931     topology.add(input_layout("bias_input", scale_input.get_layout()));
 932     topology.add(scale("scale", "input", "scale_input", "bias_input"));
 933
 934     std::vector<float> input_vec = {
 935         1.f, 2.f, -0.75f,
 936         0.f, -1.5f, -3.f,
 937     };
 938     set_values(input, input_vec);
 939
 940     std::vector<float> scale_vec = {
 941         3.1f,
 942     };
 943     set_values(scale_input, scale_vec);
 944
 945     std::vector<float> bias_vec = {
 946         -0.1f,
 947     };
 948     set_values(bias_input, bias_vec);
 949
 950     network network(engine, topology);
 951
 952     network.set_input_data("input", input);
 953     network.set_input_data("scale_input", scale_input);
 954     network.set_input_data("bias_input", bias_input);
 955
 956     auto outputs = network.execute();
 957
 958     auto output = outputs.at("scale").get_memory();
 959     auto output_ptr = output.pointer<float>();
 960
 961     for (unsigned int i = 0; i < input_vec.size(); ++i) {
 962         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[0] + bias_vec[0], 1e-05F);
 963     }
 964 }
 965
 966 TEST(scale_gpu, basic_in2x3_scale_single_value_xb) {
 967     //  Scale  : 1x1
 968     //  Bias   : 1x1
 969     //  Input  : 2x3
 970     //  Output : 2x3
 971
 972     //  Input:
 973     //  x0: 1     2 -0.75
 974     //  x1: 0  -1.5    -3
 975     //
 976     //  Scale:
 977     //  3.1
 978
 979     //  Bias:
 980     //  -0.1
 981
 982     const auto& engine = get_test_engine();
 983
 984     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } });
 985     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 1, 1 } });
 986     auto bias_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 1, 1 } });
 987
 988     topology topology;
 989     topology.add(input_layout("input", input.get_layout()));
 990     topology.add(input_layout("scale_input", scale_input.get_layout()));
 991     topology.add(input_layout("bias_input", scale_input.get_layout()));
 992     topology.add(scale("scale", "input", "scale_input", "bias_input"));
 993
 994     std::vector<float> input_vec = {
 995         1.f, 2.f, -0.75f,
 996         0.f, -1.5f, -3.f,
 997     };
 998     set_values(input, input_vec);
 999
1000     std::vector<float> scale_vec = {
1001         3.1f,
1002     };
1003     set_values(scale_input, scale_vec);
1004
1005     std::vector<float> bias_vec = {
1006         -0.1f,
1007     };
1008     set_values(bias_input, bias_vec);
1009
1010     network network(engine, topology);
1011
1012     network.set_input_data("input", input);
1013     network.set_input_data("scale_input", scale_input);
1014     network.set_input_data("bias_input", bias_input);
1015
1016     auto outputs = network.execute();
1017
1018     auto output = outputs.at("scale").get_memory();
1019     auto output_ptr = output.pointer<float>();
1020
1021     for (unsigned int i = 0; i < input_vec.size(); ++i) {
1022         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[0] + bias_vec[0], 1e-05F);
1023     }
1024 }
1025
1026 TEST(scale_gpu, basic_in2x3_scale_same_size_no_bias_bx) {
1027     //  Scale  : 2x3
1028     //  Input  : 2x3
1029     //  Output : 2x3
1030
1031     //  Input:
1032     //  b0: 1    2 -0.75
1033     //  b1: 0 -1.5    -3
1034     //
1035     //  Scale:
1036     //  b0: 3.1   0.2   0.17
1037     //  b1: 10     -3      1
1038
1039     const auto& engine = get_test_engine();
1040
1041     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } });
1042     auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } });
1043
1044     topology topology;
1045     topology.add(input_layout("input", input.get_layout()));
1046     topology.add(input_layout("scale_input", scale_input.get_layout()));
1047     topology.add(scale("scale", "input", "scale_input"));
1048
1049     std::vector<float> input_vec = {
1050         1.f, 2.f, -0.75f,
1051         0.f, -1.5f, -3.f,
1052     };
1053     set_values(input, input_vec);
1054
1055     std::vector<float> scale_vec = {
1056         3.1f, 0.2f, 0.17f,
1057         10.f, -3.f, 1.f,
1058     };
1059     set_values(scale_input, scale_vec);
1060
1061     network network(engine, topology);
1062
1063     network.set_input_data("input", input);
1064     network.set_input_data("scale_input", scale_input);
1065
1066     auto outputs = network.execute();
1067
1068     auto output = outputs.at("scale").get_memory();
1069     auto output_ptr = output.pointer<float>();
1070
1071     for (unsigned int i = 0; i < input_vec.size(); ++i) {
1072         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[i], 1e-05F);
1073     }
1074 }
1075
1076 TEST(scale_gpu, basic_in2x3_scale_same_size_no_bias_xb) {
1077     //  Scale  : 2x3
1078     //  Input  : 2x3
1079     //  Output : 2x3
1080
1081     //  Input:
1082     //  x0: 1     2  -0.75
1083     //  x1: 0  -1.5     -3
1084     //
1085     //  Scale:
1086     //  x0: 3.1    0.2   0.17
1087     //  x1: 10      -3      1
1088
1089     const auto& engine = get_test_engine();
1090
1091     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } });
1092     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } });
1093
1094     topology topology;
1095     topology.add(input_layout("input", input.get_layout()));
1096     topology.add(input_layout("scale_input", scale_input.get_layout()));
1097     topology.add(scale("scale", "input", "scale_input"));
1098
1099     std::vector<float> input_vec = {
1100         1.f, 2.f, -0.75f,
1101         0.f, -1.5f, -3.f,
1102     };
1103     set_values(input, input_vec);
1104
1105     std::vector<float> scale_vec = {
1106         3.1f, 0.2f, 0.17f,
1107         10.f, -3.f, 1.f,
1108     };
1109     set_values(scale_input, scale_vec);
1110
1111     network network(engine, topology);
1112
1113     network.set_input_data("input", input);
1114     network.set_input_data("scale_input", scale_input);
1115
1116     auto outputs = network.execute();
1117
1118     auto output = outputs.at("scale").get_memory();
1119     auto output_ptr = output.pointer<float>();
1120
1121     for (unsigned int i = 0; i < input_vec.size(); ++i) {
1122         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[i], 1e-05F);
1123     }
1124 }
1125
1126 TEST(scale_gpu, basic_in2x3x2x2_scale_yxfb_bfyx_same_size_padding) {
1127     //  Scale  : 2x2x1x1
1128     //  Input  : 2x2x1x1
1129     //  Output : 2x2x1x1
1130     //  Output Padding: 2x2
1131     //  Input Padding: 2x1 (with reorder)
1132
1133     //  Input:
1134     //  1    2
1135     //  3    4
1136
1137     //
1138     //  Scale:
1139     //  0.1    0.2
1140     //  0.6    0.5
1141
1142     const auto& engine = get_test_engine();
1143     std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
1144
1145     for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
1146     {
1147         std::cout << "Testing format: " << format::order(*it) << std::endl;
1148
1149         tensor input_tensor(1, 1, 2, 2);
1150
1151         auto input = memory::allocate(engine, { data_types::f32, *it, input_tensor });
1152         auto scale_input = memory::allocate(engine, { data_types::f32, *it, input_tensor });
1153
1154         topology topology;
1155         topology.add(input_layout("input", input.get_layout()));
1156         topology.add(reorder("reorder", "input", input.get_layout().with_padding({ { 0, 0, 1, 2 }, 0 })));
1157         topology.add(input_layout("scale_input", scale_input.get_layout()));
1158         topology.add(scale("scale", "reorder", "scale_input", padding( { 0, 0, 2, 2 }, 0 )));
1159
1160         std::vector<float> input_vec = { 1.f, 2.f, 3.f, 4.f };
1161         set_values(input, input_vec);
1162
1163         std::vector<float> scale_input_vec = { 0.1f, 0.2f, 0.6f, 0.5f };
1164         set_values(scale_input, scale_input_vec);
1165
1166         std::vector<float> expected = {
1167             0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
1168             0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
1169             0.f, 0.f, 0.1f, 0.4f, 0.f, 0.f,
1170             0.f, 0.f, 1.8f, 2.0f, 0.f, 0.f,
1171             0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
1172             0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
1173         };
1174
1175         network network(engine, topology);
1176
1177         network.set_input_data("input", input);
1178         network.set_input_data("scale_input", scale_input);
1179
1180         auto outputs = network.execute();
1181
1182         auto output = outputs.at("scale").get_memory();
1183         auto output_ptr = output.pointer<float>();
1184
1185         for (unsigned int i = 0; i < expected.size(); ++i) {
1186             EXPECT_NEAR(output_ptr[i], expected[i], 1e-05F);
1187         }
1188     }
1189 }
1190 //////////////////////////////////////////////////////////////////////////////
1191 //                                                                          //
1192 //                      Exhaustive Negative Matrix tests                    //
1193 //                                                                          //
1194 //////////////////////////////////////////////////////////////////////////////
1195
1196 //TODO: this should be done using TEST_P or some equivallent construct
1197 static network setup_scale_network(
1198     const data_types dt,
1199     const tensor input_tensor,
1200     const tensor scale_tensor,
1201     const tensor bias_tensor,
1202     const format f,
1203     const format of,
1204     bool pass_bias          //TODO: a WA for lack of std::optional<tensor> bias
1205 )
1206 {
1207     const auto& engine = get_test_engine();
1208     topology topology;
1209
1210     auto input_mem = memory::allocate(engine, { dt, f, input_tensor });
1211     auto scale_mem = memory::allocate(engine, { dt, of, scale_tensor });
1212     topology.add(input_layout("input", input_mem.get_layout()));
1213     topology.add(input_layout("scale_input", scale_mem.get_layout()));
1214
1215     if (pass_bias)
1216     {
1217         auto bias_mem = memory::allocate(engine, { dt, f, bias_tensor });
1218         topology.add(input_layout("bias_input", bias_mem.get_layout()));
1219
1220         topology.add(scale("scale", "input", "scale_input", "bias_input" ));
1221     }
1222     else
1223     {
1224         topology.add(scale("scale", "input", "scale_input"));
1225     }
1226 //TODO: this will be supported after the API change
1227 //    else
1228 //    {
1229 //        assert(!pass_bias);
1230 //
1231 //        topology.add(scale("scale", "input", "scale_input"));
1232 //    }
1233
1234     return network(engine, topology);
1235 }
1236
1237 TEST(NegativeScaleTest, TestAll) {
1238     auto d = data_types::f32;
1239     auto f = format::bfyx;
1240     auto of = format::yxfb;
1241
1242     std::vector<int> t { 3, 4, 5, 6 };
1243     std::vector<int> t2 { 5, 6, 4, 3 };
1244
1245     // broadcast rules mean that either the dim size is equal to input dim or is 1
1246     std::vector<std::vector<int>> good_ts =
1247     {
1248         { 1, 4, 5, 6 }, { 3, 1, 5, 6 }, { 3, 4, 1, 6 }, { 3, 4, 5, 1 },
1249         { 1, 1, 5, 6 }, { 1, 4, 1, 6 }, { 1, 4, 5, 1 }, { 3, 1, 1, 6 }, { 3, 1, 5, 1 }, { 3, 4, 1, 1 },
1250         { 1, 1, 1, 6 }, { 1, 1, 5, 1 }, { 1, 4, 1, 1 }, { 3, 1, 1, 1 }
1251     };
1252     std::vector<std::vector<int>> bad_ts = { { 2, 4, 5, 6 }, { 3, 2, 5, 6 }, { 3, 4, 2, 6 }, { 3, 4, 5, 2 } };
1253
1254     //TODO: should be ASSERT_THROW(statement, exception_type) - but what exception type?
1255     ASSERT_ANY_THROW(setup_scale_network(d, { }, { }, { }, f, of, false));
1256     ASSERT_ANY_THROW(setup_scale_network(d, { }, { }, { }, f, of, true));
1257
1258     ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(t2), tensor(t), f, of, true));
1259     ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(t2), tensor(t), f, of, false));
1260
1261     // make sure that it's the input that's masked in the scale/bias with a "1", not ther other way around
1262     for (const auto & good : good_ts)
1263     {
1264         ASSERT_ANY_THROW(setup_scale_network(d, tensor(good), tensor(t), tensor(t), f, of, true));
1265     }
1266
1267     // sizes must either be equal to input or at most have
1268     for (const auto & bad : bad_ts)
1269     {
1270         ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(bad), tensor(t), f, of, true));
1271         ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(t), tensor(bad), f, of, true));
1272
1273         for (const auto & good : good_ts)
1274         {
1275             ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(bad), tensor(good), f, of, true));
1276             ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(good), tensor(bad), f, of, true));
1277         }
1278     }
1279
1280     // we expect the broadcast mask to be identical for scale and bias, when present
1281     for (unsigned i = 0; i < good_ts.size(); ++i)
1282     for (unsigned j = 0; j < good_ts.size(); ++j)
1283         if (i != j)
1284         {
1285             ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(good_ts[i]), tensor(good_ts[j]), f, of, true));
1286         }
1287
1288 }
1289
1290 //////////////////////////////////////////////////////////////////////////////
1291 //                                                                          //
1292 //                      Exhaustive Positive Matrix tests                    //
1293 //                                                                          //
1294 //////////////////////////////////////////////////////////////////////////////
1295
1296 using namespace cldnn;
1297
1298 class scale_test : public tests::generic_test
1299 {
1300 public:
1301     static void TearDownTestCase()
1302     {
1303         all_generic_params.clear();
1304         all_layer_params.clear();
1305     }
1306
1307     //TODO: use an enum instead of int i
1308     static std::vector<cldnn::primitive*> generate_specific_test_params(int variant)
1309     {
1310         std::vector<cldnn::primitive*> all_layer_params;
1311
1312         switch(variant)
1313         {
1314             case 0: all_layer_params.push_back(new scale("scale", "input0", "input1")); break;
1315             case 1: all_layer_params.push_back(new scale("scale", "input0", "input1", "input2")); break;
1316                     //    case 3: all_layer_params.push_back(new scale("scale", "input0", "input1", true));    // This case should be checked by negative_scale_test
1317                     //    case 4: all_layer_params.push_back(new scale("scale", "input0", "input1", false));    // This case should be checked by negative_scale_test
1318             default: assert(0);
1319         }
1320
1321         return all_layer_params;
1322     }
1323
1324     static std::vector<tests::test_params*> generate_generic_test_params(int variant)
1325     {
1326         assert(!variant || variant == 1);
1327
1328         std::vector<tests::test_params*> all_generic_params;
1329
1330         auto data_types = test_data_types();
1331
1332         for (cldnn::data_types dt : data_types)
1333         for (tensor & t : test_input_sizes)
1334         {
1335             std::vector<std::vector<int>> attempted_dims;
1336
1337             for (int32_t b : test_batch_sizes)
1338             for (auto f : test_feature_sizes)
1339             for (int mask = 0; mask < 16; ++mask)    //TODO: do we want to restrict it to some smaller subset like for (auto mask : { 0, 1, 3, 7, 15, 5, 10})? the problem is that because of the layout we might miss some interesting combinations since this is effectively hardcoded int he kernels
1340             {
1341                 const int w = t.spatial[0];
1342                 const int h = t.spatial[1];
1343
1344                 const auto mb = mask & 0x8 ? b : 1;
1345                 const auto mf = mask & 0x4 ? f : 1;
1346                 const auto mh = mask & 0x2 ? h : 1;
1347                 const auto mw = mask & 0x1 ? w : 1;
1348
1349                 // avoid adding test cases with different masks leading to the same dimensions
1350                 if(attempted_dims.end() == std::find_if(attempted_dims.begin(), attempted_dims.end(), [=](const std::vector<int> & arr) { return arr[0] == mb && arr[1] == mf && arr[2] == mh && arr[3] == mw; }))
1351                 {
1352                     std::vector<int> tmp { mb, mf, mh, mw };
1353                     attempted_dims.push_back(tmp);
1354
1355                     test_params * tp = new test_params();
1356                     tp->data_type = dt;
1357
1358                     tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor(  b, f, w, h  )));
1359                     tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor(  mb, mf, mw, mh  )));
1360                     if (variant)
1361                             tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor(  mb, mf, mw, mh  )));
1362
1363                     all_generic_params.emplace_back(tp);
1364                 }
1365             }
1366         }
1367
1368         return all_generic_params;
1369     }
1370
1371     static std::vector<std::tuple<test_params*, cldnn::primitive*>> generate_all_test_params()
1372     {
1373         std::vector<std::tuple<test_params*, cldnn::primitive*>> res;
1374
1375         for (int variant = 0; variant <= 1; ++variant)
1376         {
1377             auto tpv = generate_generic_test_params(variant);
1378             auto pv = generate_specific_test_params(variant);
1379
1380             for (auto & tp : tpv)
1381                 all_generic_params.emplace_back(tp);
1382
1383             for (auto & p : pv)
1384                 all_layer_params.emplace_back(p);
1385
1386             for (auto & tp : tpv)
1387             for (auto & p: pv)
1388                 res.emplace_back(tp, p);
1389         }
1390
1391         return res;
1392     }
1393
1394     virtual bool is_format_supported(cldnn::format format) override
1395     {
1396         return format == cldnn_format_type::cldnn_format_bfyx;
1397     }
1398
1399     template<typename Type>
1400     memory generate_reference_typed(const std::vector<memory> & inputs)
1401     {
1402         assert(inputs.size() == 3 || inputs.size() == 2);
1403         const bool bias_input_present = inputs.size() == 3;
1404
1405         const memory & input = inputs[0];
1406         const memory & scale = inputs[1];
1407         const memory * bias = bias_input_present ? &inputs[2] : nullptr;
1408         assert(!bias_input_present || bias);
1409
1410         //Output is bfyx
1411         auto output = memory::allocate(engine, cldnn::layout(input.get_layout().data_type, cldnn::format::bfyx, input.get_layout().size ));
1412
1413         const auto in0_mem = input.pointer<Type>();
1414         const auto in1_mem = scale.pointer<Type>();
1415         const auto in2_mem_ptr = bias ? std::make_shared<pointer<Type>>(*bias) : nullptr;
1416         const Type * const in2_mem = in2_mem_ptr ? in2_mem_ptr->data() : nullptr; //TODO: is the condition needed or is it nullptr anyway?
1417         auto out_mem = output.pointer<Type>();
1418
1419         const auto input_sizes = input.get_layout().size.sizes(cldnn::format::bfyx);
1420
1421         const int in0_b = input_sizes[0];
1422         const int in0_f = input_sizes[1];
1423         const int in0_h = input_sizes[2];
1424         const int in0_w = input_sizes[3];
1425
1426         { // asserting dims
1427             const auto output_sizes = output.get_layout().size.sizes(cldnn::format::bfyx);
1428             const int out_b = output_sizes[0]; (void) out_b;
1429             const int out_f = output_sizes[1]; (void) out_f;
1430             const int out_h = output_sizes[2]; (void) out_h;
1431             const int out_w = output_sizes[3]; (void) out_w;
1432
1433             const auto scale_sizes = scale.get_layout().size.sizes(cldnn::format::bfyx);
1434             const int in1_b = scale_sizes[0]; (void) in1_b;
1435             const int in1_f = scale_sizes[1]; (void) in1_f;
1436             const int in1_h = scale_sizes[2]; (void) in1_h;
1437             const int in1_w = scale_sizes[3]; (void) in1_w;
1438             // input and output dims must match
1439             assert(in0_b == out_b && in0_f == out_f && in0_h == out_h && in0_w == out_w);
1440
1441             // scale/bias dims must be equal to in/out or be 1 for broadcast
1442             assert(in1_b == 1 || in1_b == in0_b);
1443             assert(in1_f == 1 || in1_f == in0_f);
1444             assert(in1_h == 1 || in1_h == in0_h);
1445             assert(in1_w == 1 || in1_w == in0_w);
1446
1447             if (bias)
1448             {
1449                 const auto bias_sizes = bias->get_layout().size.sizes(cldnn::format::bfyx);
1450                 const int in2_b = bias_sizes[0]; (void) in2_b;
1451                 const int in2_f = bias_sizes[1]; (void) in2_f;
1452                 const int in2_h = bias_sizes[2]; (void) in2_h;
1453                 const int in2_w = bias_sizes[3]; (void) in2_w;
1454
1455                 // scale/bias dims must be equal to in/out or be 1 for broadcast
1456                 assert(in2_b == 1 || in2_b == in1_b);
1457                 assert(in2_b == 1 || in2_f == in1_f);
1458                 assert(in2_b == 1 || in2_h == in1_h);
1459                 assert(in2_b == 1 || in2_w == in1_w);
1460             }
1461         }
1462
1463         const auto input_desc = get_linear_memory_desc(input.get_layout());
1464         const auto output_desc = get_linear_memory_desc(output.get_layout());
1465         const auto scale_desc = get_linear_memory_desc(scale.get_layout());
1466         const auto bias_desc =
1467             bias ?
1468             get_linear_memory_desc(bias->get_layout()) :
1469             memory_desc();
1470
1471         for (int n = 0; n < in0_b; ++n)
1472         for (int c = 0; c < in0_f; ++c)
1473         for (int y = 0; y < in0_h; ++y)
1474         for (int x = 0; x < in0_w; ++x)
1475         {
1476             const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc);
1477             const size_t in1_idx = get_linear_index_with_broadcast(scale.get_layout(), n, c, y, x, scale_desc);
1478             const size_t out_idx = get_linear_index(output.get_layout(), n, c, y, x, output_desc);
1479
1480             out_mem[out_idx] = in0_mem[in0_idx] * in1_mem[in1_idx];
1481
1482             if (bias)
1483             {
1484                 const size_t in2_idx = get_linear_index_with_broadcast(bias->get_layout(), n, c, y, x, bias_desc);
1485                 out_mem[out_idx] += in2_mem[in2_idx];
1486             }
1487         }
1488         return output;
1489     }
1490
1491     virtual memory generate_reference(const std::vector<memory> & inputs) override
1492     {
1493         if (generic_params->data_type == data_types::f32)
1494         {
1495             return generate_reference_typed<float>(inputs);
1496         }
1497         else
1498         {
1499             return generate_reference_typed<FLOAT16>(inputs);
1500         }
1501     }
1502
1503     static std::string custom_param_name(const ::testing::TestParamInfo<std::tuple<test_params*, cldnn::primitive*>>& info)
1504     {
1505         std::stringstream res;
1506
1507         const auto & p = std::get<0>(info.param);
1508
1509         assert (p->data_type == data_types::f32 ||
1510                 p->data_type == data_types::f16);
1511
1512         res << info.index
1513             << "_" << (p->data_type == data_types::f32 ? "f32" : "f16");
1514
1515         for (unsigned i = 0; i < p->input_layouts.size(); ++i)
1516         {
1517             if (i == 0) res << "_Input";
1518             if (i == 1) res << "_ScaleInput";
1519             if (i == 2) res << "_BiasInput";
1520
1521             const auto chans = format::traits(p->fmt).order;
1522
1523             for (unsigned int j = 0; j < p->input_layouts[i].size.sizes(p->fmt).size(); ++j)
1524             {
1525                 res << chans[j] << p->input_layouts[i].size.sizes(p->fmt)[j];
1526             }
1527         }
1528         return res.str();
1529     }
1530
1531 private:
1532     static std::vector<std::unique_ptr<tests::test_params>> all_generic_params;
1533     static std::vector<std::unique_ptr<cldnn::primitive>> all_layer_params;
1534 };
1535
1536 std::vector<std::unique_ptr<cldnn::primitive>> scale_test::all_layer_params = {};
1537 std::vector<std::unique_ptr<tests::test_params>> scale_test::all_generic_params = {};
1538
1539 TEST_P(scale_test, SCALE)
1540 {
1541     run_single_test();
1542 }
1543
1544 INSTANTIATE_TEST_CASE_P(DISABLED_SCALE,
1545     scale_test,
1546     ::testing::ValuesIn(scale_test::generate_all_test_params()),
1547     scale_test::custom_param_name);