inference-engine/thirdparty/clDNN/tests/test_cases/scale_gpu_test.cpp

   1 /*
   2 // Copyright (c) 2017 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18 #include <gtest/gtest.h>
  19 #include "api/CPP/memory.hpp"
  20 #include <api/CPP/input_layout.hpp>
  21 #include "api/CPP/scale.hpp"
  22 #include <api/CPP/topology.hpp>
  23 #include <api/CPP/network.hpp>
  24 #include <api/CPP/engine.hpp>
  25 #include "test_utils/test_utils.h"
  26 #include "api/CPP/reorder.hpp"
  27
  28 #include <iostream>
  29
  30 using namespace cldnn;
  31 using namespace tests;
  32
  33 TEST(scale_gpu, basic_in2x3x2x2_scale_same_size) {
  34     //  Scale  : 2x3x2x2
  35     //  Input  : 2x3x2x2
  36     //  Output : 2x3x2x2
  37
  38     //  Input:
  39     //  f0: b0:  1    2  -10   b1:   0    0    -11
  40     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
  41     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
  42     //  f1: b0:  7    8  -16   b1:   12   8    -17
  43     //
  44     //  Scale:
  45     //  f0: b0:  0.1    0.2  0.25   b1:   0.3   0.4   0.5
  46     //  f0: b0:  0.6    0.7  0.75   b1:   0.8   0.9   1
  47     //  f1: b0:  1.1    1.2  1.25   b1:   1.3   1.4   1.5
  48     //  f1: b0:  1.6    1.7  1.75   b1:   1.8   1.9   2
  49
  50     engine engine;
  51
  52     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } });
  53     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } });
  54
  55     topology topology;
  56     topology.add(input_layout("input", input.get_layout()));
  57     topology.add(input_layout("scale_input", scale_input.get_layout()));
  58     topology.add(scale("scale", "input", "scale_input"));
  59
  60     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
  61         2.f, 0.f, 6.f, 5.2f,
  62         -10.f, -11.f, -12.f, -13.f,
  63         3.f, 0.5f, 7.f, 12.f,
  64         4.f, -0.5f, 8.f, 8.f,
  65         -14.f, -15.f, -16.f, -17.f };
  66     set_values(input, input_vec);
  67
  68     std::vector<float> scale_input_vec = {
  69         0.1f, 0.3f, 1.1f, 1.3f,
  70         0.2f, 0.4f, 1.2f, 1.4f,
  71         0.25f, 0.5f, 1.25f, 1.5f,
  72         0.6f, 0.8f, 1.6f, 1.8f,
  73         0.7f, 0.9f, 1.7f, 1.9f,
  74         0.75f, 1.f, 1.75f, 2.f
  75     };
  76     set_values(scale_input, scale_input_vec);
  77
  78     network network(engine, topology);
  79
  80     network.set_input_data("input", input);
  81     network.set_input_data("scale_input", scale_input);
  82
  83     auto outputs = network.execute();
  84
  85     auto output = outputs.at("scale").get_memory();
  86     auto output_ptr = output.pointer<float>();
  87
  88     for (unsigned int i = 0; i < input_vec.size(); ++i) {
  89         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_input_vec[i], 1e-05F);
  90     }
  91 }
  92
  93 TEST(scale_gpu, basic_in2x3x2x2_scale_same_size_bfyx) {
  94     //  Scale  : 2x3x2x2
  95     //  Input  : 2x3x2x2
  96     //  Output : 2x3x2x2
  97
  98     //  Input:
  99     //  f0: b0:  1    2  -10   b1:   0    0    -11
 100     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 101     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 102     //  f1: b0:  7    8  -16   b1:   12   8    -17
 103     //
 104     //  Scale:
 105     //  f0: b0:  0.1    0.2  0.25   b1:   0.3   0.4   0.5
 106     //  f0: b0:  0.6    0.7  0.75   b1:   0.8   0.9   1
 107     //  f1: b0:  1.1    1.2  1.25   b1:   1.3   1.4   1.5
 108     //  f1: b0:  1.6    1.7  1.75   b1:   1.8   1.9   2
 109
 110     engine engine;
 111
 112     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } });
 113     auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } });
 114
 115     topology topology;
 116     topology.add(input_layout("input", input.get_layout()));
 117     topology.add(input_layout("scale_input", scale_input.get_layout()));
 118     topology.add(scale("scale", "input", "scale_input"));
 119
 120     std::vector<float> input_vec = {
 121         1.f, 2.f, -10.f, 0.f, 0.f, -11.f,
 122         3.f, 4.f, -14.f, 0.5f, -0.5f, -15.f,
 123         5.f, 6.f, -12.f, 1.5f, 5.2f, -13.f,
 124         7.f, 8.f, -16.f, 12.f, 8.f, -17.f
 125     };
 126     set_values(input, input_vec);
 127
 128     std::vector<float> scale_input_vec = {
 129         0.1f, 0.2f, 0.25f, 0.3f, 0.4f, 0.5f,
 130         0.6f, 0.7f, 0.75f, 0.8f, 0.9f, 1.f,
 131         1.1f, 1.2f, 1.25f, 1.3f, 1.4f, 1.5f,
 132         1.6f, 1.7f, 1.75f, 1.8f, 1.9f, 2.f
 133     };
 134     set_values(scale_input, scale_input_vec);
 135
 136     network network(engine, topology);
 137
 138     network.set_input_data("input", input);
 139     network.set_input_data("scale_input", scale_input);
 140
 141     auto outputs = network.execute();
 142
 143     auto output = outputs.at("scale").get_memory();
 144     auto output_ptr = output.pointer<float>();
 145
 146     for (unsigned int i = 0; i < input_vec.size(); ++i) {
 147         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_input_vec[i], 1e-05F);
 148     }
 149 }
 150
 151 TEST(scale_gpu, basic_in2x3x2x2_scale_same_size_scale_bfyx) {
 152     //  Scale  : 2x3x2x2
 153     //  Input  : 2x3x2x2
 154     //  Output : 2x3x2x2
 155
 156     //  Input:
 157     //  f0: b0:  1    2  -10   b1:   0    0    -11
 158     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 159     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 160     //  f1: b0:  7    8  -16   b1:   12   8    -17
 161     //
 162     //  Scale:
 163     //  f0: b0:  0.1    0.2  0.25   b1:   0.3   0.4   0.5
 164     //  f0: b0:  0.6    0.7  0.75   b1:   0.8   0.9   1
 165     //  f1: b0:  1.1    1.2  1.25   b1:   1.3   1.4   1.5
 166     //  f1: b0:  1.6    1.7  1.75   b1:   1.8   1.9   2
 167
 168     engine engine;
 169
 170     auto batch_num = 2;
 171     auto feature_num = 2;
 172     auto x_size = 3;
 173     auto y_size = 2;
 174
 175     auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } });
 176     auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size, y_size } });
 177
 178     topology topology;
 179     topology.add(input_layout("input", input.get_layout()));
 180     topology.add(input_layout("scale_input", scale_input.get_layout()));
 181     topology.add(scale("scale", "input", "scale_input"));
 182
 183     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
 184         2.f, 0.f, 6.f, 5.2f,
 185         -10.f, -11.f, -12.f, -13.f,
 186         3.f, 0.5f, 7.f, 12.f,
 187         4.f, -0.5f, 8.f, 8.f,
 188         -14.f, -15.f, -16.f, -17.f };
 189     set_values(input, input_vec);
 190
 191     std::vector<float> scale_input_vec = {
 192         0.1f, 0.2f, 0.25f, 0.3f, 0.4f, 0.5f,
 193         0.6f, 0.7f, 0.75f, 0.8f, 0.9f, 1.f,
 194         1.1f, 1.2f, 1.25f, 1.3f, 1.4f, 1.5f,
 195         1.6f, 1.7f, 1.75f, 1.8f, 1.9f, 2.f
 196     };
 197     set_values(scale_input, scale_input_vec);
 198
 199     network network(engine, topology);
 200
 201     network.set_input_data("input", input);
 202     network.set_input_data("scale_input", scale_input);
 203
 204     auto outputs = network.execute();
 205
 206     auto output = outputs.at("scale").get_memory();
 207     auto output_ptr = output.pointer<float>();
 208
 209     for (int j = 0; j < feature_num; ++j) { //F
 210         for (int i = 0; i < batch_num; ++i) { //B
 211             for (int k = 0; k < y_size; ++k) { //Y
 212                 for (int l = 0; l < x_size; ++l) { //X
 213                     int linear_id = i + batch_num * (j + feature_num * (l + x_size * k));
 214                     int linear_id_scale = l + x_size * (k + y_size * (j + i * feature_num));
 215                     EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F);
 216                 }
 217             }
 218         }
 219     }
 220 }
 221
 222 TEST(scale_gpu, basic_in2x3x2x2_scale_same_size_bias_term) {
 223     //  Scale  : 2x3x2x2
 224     //  Bias   : 2x3x2x2
 225     //  Input  : 2x3x2x2
 226     //  Output : 2x3x2x2
 227
 228     //  Input:
 229     //  f0: b0:  1    2  -10   b1:   0    0    -11
 230     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 231     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 232     //  f1: b0:  7    8  -16   b1:   12   8    -17
 233     //
 234     //  Scale:
 235     //  f0: b0:  0.1    0.2  0.25   b1:   0.3   0.4   0.5
 236     //  f0: b0:  0.6    0.7  0.75   b1:   0.8   0.9   1
 237     //  f1: b0:  1.1    1.2  1.25   b1:   1.3   1.4   1.5
 238     //  f1: b0:  1.6    1.7  1.75   b1:   1.8   1.9   2
 239     //
 240     //  Bias:
 241     //  f0: b0:  1.1    1.2  1.25   b1:   1.3   1.4   1.5
 242     //  f0: b0:  2.6    2.7  2.75   b1:   2.8   2.9   2
 243     //  f1: b0:  3.1    3.2  3.25   b1:   3.3   3.4   3.5
 244     //  f1: b0:  4.6    4.7  4.75   b1:   4.8   4.9   4
 245
 246     engine engine;
 247
 248     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } });
 249     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } });
 250     auto bias = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } });
 251
 252     topology topology;
 253     topology.add(input_layout("input", input.get_layout()));
 254     topology.add(input_layout("scale_input", scale_input.get_layout()));
 255     topology.add(input_layout("bias", bias.get_layout()));
 256     topology.add(scale("scale", "input", "scale_input", "bias"));
 257
 258     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
 259         2.f, 0.f, 6.f, 5.2f,
 260         -10.f, -11.f, -12.f, -13.f,
 261         3.f, 0.5f, 7.f, 12.f,
 262         4.f, -0.5f, 8.f, 8.f,
 263         -14.f, -15.f, -16.f, -17.f };
 264     set_values(input, input_vec);
 265
 266     std::vector<float> scale_input_vec = {
 267         0.1f, 0.3f, 1.1f, 1.3f,
 268         0.2f, 0.4f, 1.2f, 1.4f,
 269         0.25f, 0.5f, 1.25f, 1.5f,
 270         0.6f, 0.8f, 1.6f, 1.8f,
 271         0.7f, 0.9f, 1.7f, 1.9f,
 272         0.75f, 1.f, 1.75f, 2.f
 273     };
 274     set_values(scale_input, scale_input_vec);
 275
 276     std::vector<float> bias_vec = {
 277         1.1f, 2.3f, 3.1f, 4.3f,
 278         1.2f, 2.4f, 3.2f, 4.4f,
 279         1.25f, 2.5f, 3.25f, 4.5f,
 280         1.6f, 2.8f, 3.6f, 4.8f,
 281         1.7f, 2.9f, 3.7f, 4.9f,
 282         1.75f, 2.f, 3.75f, 4.f
 283     };
 284     set_values(bias, bias_vec);
 285
 286     network network(engine, topology);
 287
 288     network.set_input_data("input", input);
 289     network.set_input_data("scale_input", scale_input);
 290     network.set_input_data("bias", bias);
 291
 292     auto outputs = network.execute();
 293
 294     auto output = outputs.at("scale").get_memory();
 295     auto output_ptr = output.pointer<float>();
 296
 297     for (unsigned int i = 0; i < input_vec.size(); ++i) {
 298         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_input_vec[i] + bias_vec[i], 1e-05F);
 299     }
 300 }
 301
 302 TEST(scale_gpu, basic_in2x3x2x2_scale_scalar) {
 303     //  Scale  : 1
 304     //  Input  : 2x3x2x2
 305     //  Output : 2x3x2x2
 306
 307     //  Input:
 308     //  f0: b0:  1    2  -10   b1:   0    0    -11
 309     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 310     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 311     //  f1: b0:  7    8  -16   b1:   12   8    -17
 312     //
 313     //  Scale:
 314     //  0.1    0.2
 315
 316     engine engine;
 317
 318     auto batch_num = 2;
 319     auto feature_num = 2;
 320     auto x_size = 3;
 321     auto y_size = 2;
 322
 323     auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } });
 324     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 1, 1 } });
 325
 326     topology topology;
 327     topology.add(input_layout("input", input.get_layout()));
 328     topology.add(input_layout("scale_input", scale_input.get_layout()));
 329     topology.add(scale("scale", "input", "scale_input"));
 330
 331     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
 332         2.f, 0.f, 6.f, 5.2f,
 333         -10.f, -11.f, -12.f, -13.f,
 334         3.f, 0.5f, 7.f, 12.f,
 335         4.f, -0.5f, 8.f, 8.f,
 336         -14.f, -15.f, -16.f, -17.f };
 337     set_values(input, input_vec);
 338
 339     std::vector<float> scale_input_vec = {
 340         0.1f,
 341     };
 342     set_values(scale_input, scale_input_vec);
 343
 344     network network(engine, topology);
 345
 346     network.set_input_data("input", input);
 347     network.set_input_data("scale_input", scale_input);
 348
 349     auto outputs = network.execute();
 350
 351     auto output = outputs.at("scale").get_memory();
 352     auto output_ptr = output.pointer<float>();
 353
 354     for (int j = 0; j < feature_num; ++j) { //F
 355         for (int i = 0; i < batch_num; ++i) { //B
 356             for (int k = 0; k < y_size; ++k) { //Y
 357                 for (int l = 0; l < x_size; ++l) { //X
 358                     int linear_id = i + batch_num * (j + feature_num * (l + x_size * k));
 359                     int linear_id_scale = 0;
 360                     EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F);
 361                 }
 362             }
 363         }
 364     }
 365 }
 366
 367 TEST(scale_gpu, basic_in2x3x2x2_scale_y) {
 368     //  Scale  : 2
 369     //  Input  : 2x3x2x2
 370     //  Output : 2x3x2x2
 371
 372     //  Input:
 373     //  f0: b0:  1    2  -10   b1:   0    0    -11
 374     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 375     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 376     //  f1: b0:  7    8  -16   b1:   12   8    -17
 377     //
 378     //  Scale:
 379     //  0.1    0.2
 380
 381     engine engine;
 382
 383     auto batch_num = 2;
 384     auto feature_num = 2;
 385     auto x_size = 3;
 386     auto y_size = 2;
 387
 388     auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } });
 389     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1,1,1,y_size } });
 390
 391     topology topology;
 392     topology.add(input_layout("input", input.get_layout()));
 393     topology.add(input_layout("scale_input", scale_input.get_layout()));
 394     topology.add(scale("scale", "input", "scale_input"));
 395
 396     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
 397         2.f, 0.f, 6.f, 5.2f,
 398         -10.f, -11.f, -12.f, -13.f,
 399         3.f, 0.5f, 7.f, 12.f,
 400         4.f, -0.5f, 8.f, 8.f,
 401         -14.f, -15.f, -16.f, -17.f };
 402     set_values(input, input_vec);
 403
 404     std::vector<float> scale_input_vec = {
 405         0.1f,
 406         0.2f,
 407     };
 408     set_values(scale_input, scale_input_vec);
 409
 410     network network(engine, topology);
 411
 412     network.set_input_data("input", input);
 413     network.set_input_data("scale_input", scale_input);
 414
 415     auto outputs = network.execute();
 416
 417     auto output = outputs.at("scale").get_memory();
 418     auto output_ptr = output.pointer<float>();
 419
 420     for (int j = 0; j < feature_num; ++j) { //F
 421         for (int i = 0; i < batch_num; ++i) { //B
 422             for (int k = 0; k < y_size; ++k) { //Y
 423                 for (int l = 0; l < x_size; ++l) { //X
 424                     int linear_id = i + batch_num * (j + feature_num * (l + x_size * k));
 425                     int linear_id_scale = k;
 426                     EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F);
 427                 }
 428             }
 429         }
 430     }
 431 }
 432
 433 TEST(scale_gpu, basic_in2x3x2x2_scale_fb) {
 434     //  Scale  : 2x3x2x2
 435     //  Input  : 2x3x2x2
 436     //  Output : 2x3x2x2
 437
 438     //  Input:
 439     //  f0: b0:  1    2  -10   b1:   0    0    -11
 440     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 441     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 442     //  f1: b0:  7    8  -16   b1:   12   8    -17
 443     //
 444     //  Scale: per feature per batch
 445     //  f0b0: 0.1   f0b1: 0.2
 446     //  f1b0: 0.5   f1b1: 2.0
 447
 448     engine engine;
 449
 450     auto batch_num = 2;
 451     auto feature_num = 2;
 452     auto x_size = 3;
 453     auto y_size = 2;
 454
 455     auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } });
 456     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, 1, 1 } });
 457
 458     topology topology;
 459     topology.add(input_layout("input", input.get_layout()));
 460     topology.add(input_layout("scale_input", scale_input.get_layout()));
 461     topology.add(scale("scale", "input", "scale_input"));
 462
 463     std::vector<float> input_vec = {
 464         1.f, 0.f, 5.f, 1.5f,
 465         2.f, 0.f, 6.f, 5.2f,
 466         -10.f, -11.f, -12.f, -13.f,
 467         3.f, 0.5f, 7.f, 12.f,
 468         4.f, -0.5f, 8.f, 8.f,
 469         -14.f, -15.f, -16.f, -17.f };
 470     set_values(input, input_vec);
 471
 472     std::vector<float> scale_input_vec = {
 473         0.1f, 0.2f, 0.5f, 2.0f,
 474     };
 475     set_values(scale_input, scale_input_vec);
 476
 477     network network(engine, topology);
 478
 479     network.set_input_data("input", input);
 480     network.set_input_data("scale_input", scale_input);
 481
 482     auto outputs = network.execute();
 483
 484     auto output = outputs.at("scale").get_memory();
 485     auto output_ptr = output.pointer<float>();
 486
 487     for (int j = 0; j < feature_num; ++j) { //F
 488         for (int i = 0; i < batch_num; ++i) { //B
 489             for (int k = 0; k < y_size; ++k) { //Y
 490                 for (int l = 0; l < x_size; ++l) { //X
 491                     int linear_id = i + batch_num * (j + feature_num * (l + x_size * k));
 492                     int linear_id_scale = i + feature_num * j;
 493                     EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F);
 494                 }
 495             }
 496         }
 497     }
 498 }
 499
 500 TEST(scale_gpu, basic_in2x3x2x2_scale_f) {
 501     //  Scale  : 2x3x2x2
 502     //  Input  : 2x3x2x2
 503     //  Output : 2x3x2x2
 504
 505     //  Input:
 506     //  f0: b0:  1    2  -10   b1:   0    0    -11
 507     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 508     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 509     //  f1: b0:  7    8  -16   b1:   12   8    -17
 510     //
 511     //  Scale: per feature
 512     //  f0bx: 0.1   f1bx: 0.2
 513
 514     engine engine;
 515
 516     auto batch_num = 2;
 517     auto feature_num = 2;
 518     auto x_size = 3;
 519     auto y_size = 2;
 520
 521     auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } });
 522     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, feature_num, 1, 1 } });
 523
 524     topology topology;
 525     topology.add(input_layout("input", input.get_layout()));
 526     topology.add(input_layout("scale_input", scale_input.get_layout()));
 527     topology.add(scale("scale", "input", "scale_input"));
 528
 529     std::vector<float> input_vec = {
 530         1.f, 0.f, 5.f, 1.5f,
 531         2.f, 0.f, 6.f, 5.2f,
 532         -10.f, -11.f, -12.f, -13.f,
 533         3.f, 0.5f, 7.f, 12.f,
 534         4.f, -0.5f, 8.f, 8.f,
 535         -14.f, -15.f, -16.f, -17.f };
 536     set_values(input, input_vec);
 537
 538     std::vector<float> scale_input_vec = {
 539         //f0bx  //f1bx
 540         0.1f,   0.2f
 541     };
 542     set_values(scale_input, scale_input_vec);
 543
 544     network network(engine, topology);
 545
 546     network.set_input_data("input", input);
 547     network.set_input_data("scale_input", scale_input);
 548
 549     auto outputs = network.execute();
 550
 551     auto output = outputs.at("scale").get_memory();
 552     auto output_ptr = output.pointer<float>();
 553
 554     for (int j = 0; j < feature_num; ++j) { //F
 555         for (int i = 0; i < batch_num; ++i) { //B
 556             for (int k = 0; k < y_size; ++k) { //Y
 557                 for (int l = 0; l < x_size; ++l) { //X
 558                     int linear_id = i + batch_num * (j + feature_num * (l + x_size * k));
 559                     int linear_id_scale = j;
 560                     EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F);
 561                 }
 562             }
 563         }
 564     }
 565 }
 566
 567 TEST(scale_gpu, basic_in2x3x2x2_scale_x) {
 568     //  Scale  : 3
 569     //  Input  : 2x3x2x2
 570     //  Output : 2x3x2x2
 571
 572     //  Input:
 573     //  f0: b0:  1    2  -10   b1:   0    0    -11
 574     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 575     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 576     //  f1: b0:  7    8  -16   b1:   12   8    -17
 577     //
 578     //  Scale:
 579     //  0.1    0.2  0.25
 580
 581     engine engine;
 582
 583     auto batch_num = 2;
 584     auto feature_num = 2;
 585     auto x_size = 3;
 586     auto y_size = 2;
 587
 588     auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } });
 589     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, x_size, 1 } });
 590
 591     topology topology;
 592     topology.add(input_layout("input", input.get_layout()));
 593     topology.add(input_layout("scale_input", scale_input.get_layout()));
 594     topology.add(scale("scale", "input", "scale_input"));
 595
 596     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
 597         2.f, 0.f, 6.f, 5.2f,
 598         -10.f, -11.f, -12.f, -13.f,
 599         3.f, 0.5f, 7.f, 12.f,
 600         4.f, -0.5f, 8.f, 8.f,
 601         -14.f, -15.f, -16.f, -17.f };
 602     set_values(input, input_vec);
 603
 604     std::vector<float> scale_input_vec = {
 605         0.1f,
 606         0.2f,
 607         0.25f
 608     };
 609     set_values(scale_input, scale_input_vec);
 610
 611     network network(engine, topology);
 612
 613     network.set_input_data("input", input);
 614     network.set_input_data("scale_input", scale_input);
 615
 616     auto outputs = network.execute();
 617
 618     auto output = outputs.at("scale").get_memory();
 619     auto output_ptr = output.pointer<float>();
 620
 621     for (int j = 0; j < feature_num; ++j) { //F
 622         for (int i = 0; i < batch_num; ++i) { //B
 623             for (int k = 0; k < y_size; ++k) { //Y
 624                 for (int l = 0; l < x_size; ++l) { //X
 625                     int linear_id = i + batch_num * (j + feature_num * (l + x_size * k));
 626                     int linear_id_scale = l;
 627                     EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F);
 628                 }
 629             }
 630         }
 631     }
 632 }
 633
 634 TEST(scale_gpu, basic_in2x3x2x2_scale_xy) {
 635     //  Scale  : 2x3x1x1
 636     //  Input  : 2x3x2x2
 637     //  Output : 2x3x2x2
 638
 639     //  Input:
 640     //  f0: b0:  1    2  -10   b1:   0    0    -11
 641     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 642     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 643     //  f1: b0:  7    8  -16   b1:   12   8    -17
 644     //
 645     //  Scale:
 646     //  f0:  0.1    0.2  0.25
 647     //  f0:  0.6    0.7  0.75
 648
 649     engine engine;
 650
 651     auto batch_num = 2;
 652     auto feature_num = 2;
 653     auto x_size = 3;
 654     auto y_size = 2;
 655
 656     auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } });
 657     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, x_size, y_size } });
 658
 659     topology topology;
 660     topology.add(input_layout("input", input.get_layout()));
 661     topology.add(input_layout("scale_input", scale_input.get_layout()));
 662     topology.add(scale("scale", "input", "scale_input"));
 663
 664     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
 665         2.f, 0.f, 6.f, 5.2f,
 666         -10.f, -11.f, -12.f, -13.f,
 667         3.f, 0.5f, 7.f, 12.f,
 668         4.f, -0.5f, 8.f, 8.f,
 669         -14.f, -15.f, -16.f, -17.f };
 670     set_values(input, input_vec);
 671
 672     std::vector<float> scale_input_vec = {
 673         0.1f,
 674         0.2f,
 675         0.25f,
 676         0.6f,
 677         0.7f,
 678         0.75f
 679     };
 680     set_values(scale_input, scale_input_vec);
 681
 682     network network(engine, topology);
 683
 684     network.set_input_data("input", input);
 685     network.set_input_data("scale_input", scale_input);
 686
 687     auto outputs = network.execute();
 688
 689     auto output = outputs.at("scale").get_memory();
 690     auto output_ptr = output.pointer<float>();
 691
 692     for (int j = 0; j < feature_num; ++j) { //F
 693         for (int i = 0; i < batch_num; ++i) { //B
 694             for (int k = 0; k < y_size; ++k) { //Y
 695                 for (int l = 0; l < x_size; ++l) { //X
 696                     int linear_id = i + batch_num * (j + feature_num * (l + x_size * k));
 697                     int linear_id_scale = l + x_size * k;
 698                     EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F);
 699                 }
 700             }
 701         }
 702     }
 703 }
 704
 705 TEST(scale_gpu, basic_in2x3x2x2_scale_batch1) {
 706     //  Scale  : 2x3x2x1
 707     //  Input  : 2x3x2x2
 708     //  Output : 2x3x2x2
 709
 710     //  Input:
 711     //  f0: b0:  1    2  -10   b1:   0    0    -11
 712     //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
 713     //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
 714     //  f1: b0:  7    8  -16   b1:   12   8    -17
 715     //
 716     //  Scale:
 717     //  f0: b0:  0.1    0.2  0.25
 718     //  f0: b0:  0.6    0.7  0.75
 719     //  f1: b0:  1.1    1.2  1.25
 720     //  f1: b0:  1.6    1.7  1.75
 721
 722     engine engine;
 723
 724     auto batch_num = 2;
 725     auto feature_num = 2;
 726     auto x_size = 3;
 727     auto y_size = 2;
 728
 729     auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ batch_num, feature_num, x_size, y_size } });
 730     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, feature_num, x_size, y_size } });
 731
 732     topology topology;
 733     topology.add(input_layout("input", input.get_layout()));
 734     topology.add(input_layout("scale_input", scale_input.get_layout()));
 735     topology.add(scale("scale", "input", "scale_input"));
 736
 737     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f,
 738         2.f, 0.f, 6.f, 5.2f,
 739         -10.f, -11.f, -12.f, -13.f,
 740         3.f, 0.5f, 7.f, 12.f,
 741         4.f, -0.5f, 8.f, 8.f,
 742         -14.f, -15.f, -16.f, -17.f };
 743     set_values(input, input_vec);
 744
 745     std::vector<float> scale_input_vec = {
 746         0.1f, 1.1f,
 747         0.2f, 1.2f,
 748         0.25f, 1.25f,
 749         0.6f, 1.6f,
 750         0.7f, 1.7f,
 751         0.75f, 1.75f
 752     };
 753     set_values(scale_input, scale_input_vec);
 754
 755     network network(engine, topology);
 756
 757     network.set_input_data("input", input);
 758     network.set_input_data("scale_input", scale_input);
 759
 760     auto outputs = network.execute();
 761
 762     auto output = outputs.at("scale").get_memory();
 763     auto output_ptr = output.pointer<float>();
 764
 765     for (int j = 0; j < feature_num; ++j) { //F
 766         for (int i = 0; i < batch_num; ++i) { //B
 767             for (int k = 0; k < y_size; ++k) { //Y
 768                 for (int l = 0; l < x_size; ++l) { //X
 769                     int linear_id = i + batch_num * (j + feature_num * (l + x_size * k));
 770                     int linear_id_scale = j + feature_num * (l + x_size * k);
 771                     EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05F);
 772                 }
 773             }
 774         }
 775     }
 776 }
 777
 778 TEST(scale_gpu, basic_in2x3_scale_same_size_bx) {
 779     //  Scale  : 2x3
 780     //  Bias   : 2x3
 781     //  Input  : 2x3
 782     //  Output : 2x3
 783
 784     //  Input:
 785     //  b0: 1  2  -0.75
 786     //  b1: 0 -1.5  -3
 787     //
 788     //  Scale:
 789     //  b0: 3.1  0.2  0.17
 790     //  b1:  10   -3     1
 791
 792     //  Bias:
 793     //  b0: -0.1 3.2  7
 794     //  b1: 0    1   -1
 795
 796     engine engine;
 797
 798     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } });
 799     auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } });
 800     auto bias_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } });
 801
 802     topology topology;
 803     topology.add(input_layout("input", input.get_layout()));
 804     topology.add(input_layout("scale_input", scale_input.get_layout()));
 805     topology.add(input_layout("bias_input", scale_input.get_layout()));
 806     topology.add(scale("scale", "input", "scale_input", "bias_input"));
 807
 808     std::vector<float> input_vec = {
 809         1.f, 2.f, -0.75f,
 810         0.f, -1.5f, -3.f,
 811     };
 812     set_values(input, input_vec);
 813
 814     std::vector<float> scale_vec = {
 815         3.1f, 0.2f, 0.17f,
 816         10.f, -3.f, 1.f,
 817     };
 818     set_values(scale_input, scale_vec);
 819
 820     std::vector<float> bias_vec = {
 821         -0.1f, 3.2f, 7.f,
 822         0.f, 1.f, -1.f,
 823     };
 824     set_values(bias_input, bias_vec);
 825
 826     network network(engine, topology);
 827
 828     network.set_input_data("input", input);
 829     network.set_input_data("scale_input", scale_input);
 830     network.set_input_data("bias_input", bias_input);
 831
 832     auto outputs = network.execute();
 833
 834     auto output = outputs.at("scale").get_memory();
 835     auto output_ptr = output.pointer<float>();
 836
 837     for (unsigned int i = 0; i < input_vec.size(); ++i) {
 838         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[i] + bias_vec[i], 1e-05F);
 839     }
 840 }
 841
 842 TEST(scale_gpu, basic_in2x3_scale_same_size_xb) {
 843     //  Scale  : 2x3
 844     //  Bias   : 2x3
 845     //  Input  : 2x3
 846     //  Output : 2x3
 847
 848     //  Input:
 849     //  x0: 1     2  -0.75
 850     //  x1: 0  -1.5     -3
 851     //
 852     //  Scale:
 853     //  x0: 3.1   0.2  0.17
 854     //  x1: 10     -3     1
 855
 856     //  Bias:
 857     //  x0: -0.1  3.2   7
 858     //  x1: 0       1  -1
 859
 860     engine engine;
 861
 862     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } });
 863     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } });
 864     auto bias_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } });
 865
 866     topology topology;
 867     topology.add(input_layout("input", input.get_layout()));
 868     topology.add(input_layout("scale_input", scale_input.get_layout()));
 869     topology.add(input_layout("bias_input", scale_input.get_layout()));
 870     topology.add(scale("scale", "input", "scale_input", "bias_input"));
 871
 872     std::vector<float> input_vec = {
 873         1.f, 2.f, -0.75f,
 874         0.f, -1.5f, -3.f,
 875     };
 876     set_values(input, input_vec);
 877
 878     std::vector<float> scale_vec = {
 879         3.1f, 0.2f, 0.17f,
 880         10.f, -3.f, 1.f,
 881     };
 882     set_values(scale_input, scale_vec);
 883
 884     std::vector<float> bias_vec = {
 885         -0.1f, 3.2f, 7.f,
 886         0.f, 1.f, -1.f,
 887     };
 888     set_values(bias_input, bias_vec);
 889
 890     network network(engine, topology);
 891
 892     network.set_input_data("input", input);
 893     network.set_input_data("scale_input", scale_input);
 894     network.set_input_data("bias_input", bias_input);
 895
 896     auto outputs = network.execute();
 897
 898     auto output = outputs.at("scale").get_memory();
 899     auto output_ptr = output.pointer<float>();
 900
 901     for (unsigned int i = 0; i < input_vec.size(); ++i) {
 902         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[i] + bias_vec[i], 1e-05F);
 903     }
 904 }
 905
 906 TEST(scale_gpu, basic_in2x3_scale_single_value_bx) {
 907     //  Scale  : 1x1
 908     //  Bias   : 1x1
 909     //  Input  : 2x3
 910     //  Output : 2x3
 911
 912     //  Input:
 913     //  b0: 1    2 -0.75
 914     //  b1: 0 -1.5    -3
 915     //
 916     //  Scale:
 917     //  3.1
 918
 919     //  Bias:
 920     //  -0.1
 921
 922     engine engine;
 923
 924     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } });
 925     auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 926     auto bias_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
 927
 928     topology topology;
 929     topology.add(input_layout("input", input.get_layout()));
 930     topology.add(input_layout("scale_input", scale_input.get_layout()));
 931     topology.add(input_layout("bias_input", scale_input.get_layout()));
 932     topology.add(scale("scale", "input", "scale_input", "bias_input"));
 933
 934     std::vector<float> input_vec = {
 935         1.f, 2.f, -0.75f,
 936         0.f, -1.5f, -3.f,
 937     };
 938     set_values(input, input_vec);
 939
 940     std::vector<float> scale_vec = {
 941         3.1f,
 942     };
 943     set_values(scale_input, scale_vec);
 944
 945     std::vector<float> bias_vec = {
 946         -0.1f,
 947     };
 948     set_values(bias_input, bias_vec);
 949
 950     network network(engine, topology);
 951
 952     network.set_input_data("input", input);
 953     network.set_input_data("scale_input", scale_input);
 954     network.set_input_data("bias_input", bias_input);
 955
 956     auto outputs = network.execute();
 957
 958     auto output = outputs.at("scale").get_memory();
 959     auto output_ptr = output.pointer<float>();
 960
 961     for (unsigned int i = 0; i < input_vec.size(); ++i) {
 962         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[0] + bias_vec[0], 1e-05F);
 963     }
 964 }
 965
 966 TEST(scale_gpu, basic_in2x3_scale_single_value_xb) {
 967     //  Scale  : 1x1
 968     //  Bias   : 1x1
 969     //  Input  : 2x3
 970     //  Output : 2x3
 971
 972     //  Input:
 973     //  x0: 1     2 -0.75
 974     //  x1: 0  -1.5    -3
 975     //
 976     //  Scale:
 977     //  3.1
 978
 979     //  Bias:
 980     //  -0.1
 981
 982     engine engine;
 983
 984     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } });
 985     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 1, 1 } });
 986     auto bias_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 1, 1 } });
 987
 988     topology topology;
 989     topology.add(input_layout("input", input.get_layout()));
 990     topology.add(input_layout("scale_input", scale_input.get_layout()));
 991     topology.add(input_layout("bias_input", scale_input.get_layout()));
 992     topology.add(scale("scale", "input", "scale_input", "bias_input"));
 993
 994     std::vector<float> input_vec = {
 995         1.f, 2.f, -0.75f,
 996         0.f, -1.5f, -3.f,
 997     };
 998     set_values(input, input_vec);
 999
1000     std::vector<float> scale_vec = {
1001         3.1f,
1002     };
1003     set_values(scale_input, scale_vec);
1004
1005     std::vector<float> bias_vec = {
1006         -0.1f,
1007     };
1008     set_values(bias_input, bias_vec);
1009
1010     network network(engine, topology);
1011
1012     network.set_input_data("input", input);
1013     network.set_input_data("scale_input", scale_input);
1014     network.set_input_data("bias_input", bias_input);
1015
1016     auto outputs = network.execute();
1017
1018     auto output = outputs.at("scale").get_memory();
1019     auto output_ptr = output.pointer<float>();
1020
1021     for (unsigned int i = 0; i < input_vec.size(); ++i) {
1022         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[0] + bias_vec[0], 1e-05F);
1023     }
1024 }
1025
1026 TEST(scale_gpu, basic_in2x3_scale_same_size_no_bias_bx) {
1027     //  Scale  : 2x3
1028     //  Input  : 2x3
1029     //  Output : 2x3
1030
1031     //  Input:
1032     //  b0: 1    2 -0.75
1033     //  b1: 0 -1.5    -3
1034     //
1035     //  Scale:
1036     //  b0: 3.1   0.2   0.17
1037     //  b1: 10     -3      1
1038
1039     engine engine;
1040
1041     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } });
1042     auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } });
1043
1044     topology topology;
1045     topology.add(input_layout("input", input.get_layout()));
1046     topology.add(input_layout("scale_input", scale_input.get_layout()));
1047     topology.add(scale("scale", "input", "scale_input"));
1048
1049     std::vector<float> input_vec = {
1050         1.f, 2.f, -0.75f,
1051         0.f, -1.5f, -3.f,
1052     };
1053     set_values(input, input_vec);
1054
1055     std::vector<float> scale_vec = {
1056         3.1f, 0.2f, 0.17f,
1057         10.f, -3.f, 1.f,
1058     };
1059     set_values(scale_input, scale_vec);
1060
1061     network network(engine, topology);
1062
1063     network.set_input_data("input", input);
1064     network.set_input_data("scale_input", scale_input);
1065
1066     auto outputs = network.execute();
1067
1068     auto output = outputs.at("scale").get_memory();
1069     auto output_ptr = output.pointer<float>();
1070
1071     for (unsigned int i = 0; i < input_vec.size(); ++i) {
1072         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[i], 1e-05F);
1073     }
1074 }
1075
1076 TEST(scale_gpu, basic_in2x3_scale_same_size_no_bias_xb) {
1077     //  Scale  : 2x3
1078     //  Input  : 2x3
1079     //  Output : 2x3
1080
1081     //  Input:
1082     //  x0: 1     2  -0.75
1083     //  x1: 0  -1.5     -3
1084     //
1085     //  Scale:
1086     //  x0: 3.1    0.2   0.17
1087     //  x1: 10      -3      1
1088
1089     engine engine;
1090
1091     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } });
1092     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } });
1093
1094     topology topology;
1095     topology.add(input_layout("input", input.get_layout()));
1096     topology.add(input_layout("scale_input", scale_input.get_layout()));
1097     topology.add(scale("scale", "input", "scale_input"));
1098
1099     std::vector<float> input_vec = {
1100         1.f, 2.f, -0.75f,
1101         0.f, -1.5f, -3.f,
1102     };
1103     set_values(input, input_vec);
1104
1105     std::vector<float> scale_vec = {
1106         3.1f, 0.2f, 0.17f,
1107         10.f, -3.f, 1.f,
1108     };
1109     set_values(scale_input, scale_vec);
1110
1111     network network(engine, topology);
1112
1113     network.set_input_data("input", input);
1114     network.set_input_data("scale_input", scale_input);
1115
1116     auto outputs = network.execute();
1117
1118     auto output = outputs.at("scale").get_memory();
1119     auto output_ptr = output.pointer<float>();
1120
1121     for (unsigned int i = 0; i < input_vec.size(); ++i) {
1122         EXPECT_NEAR(output_ptr[i], input_vec[i] * scale_vec[i], 1e-05F);
1123     }
1124 }
1125
1126 TEST(scale_gpu, basic_in2x3x2x2_scale_yxfb_bfyx_same_size_padding) {
1127     //  Scale  : 2x2x1x1
1128     //  Input  : 2x2x1x1
1129     //  Output : 2x2x1x1
1130     //  Output Padding: 2x2
1131     //  Input Padding: 2x1 (with reorder)
1132
1133     //  Input:
1134     //  1    2
1135     //  3    4
1136
1137     //
1138     //  Scale:
1139     //  0.1    0.2
1140     //  0.6    0.5
1141
1142     engine engine;
1143     std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
1144
1145     for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
1146     {
1147         std::cout << "Testing format: " << format::order(*it) << std::endl;
1148
1149         tensor input_tensor(1, 1, 2, 2);
1150
1151         auto input = memory::allocate(engine, { data_types::f32, *it, input_tensor });
1152         auto scale_input = memory::allocate(engine, { data_types::f32, *it, input_tensor });
1153
1154         topology topology;
1155         topology.add(input_layout("input", input.get_layout()));
1156         topology.add(reorder("reorder", "input", input.get_layout().with_padding({ { 0, 0, 1, 2 }, 0 })));
1157         topology.add(input_layout("scale_input", scale_input.get_layout()));
1158         topology.add(scale("scale", "reorder", "scale_input", padding( { 0, 0, 2, 2 }, 0 )));
1159
1160         std::vector<float> input_vec = { 1.f, 2.f, 3.f, 4.f };
1161         set_values(input, input_vec);
1162
1163         std::vector<float> scale_input_vec = { 0.1f, 0.2f, 0.6f, 0.5f };
1164         set_values(scale_input, scale_input_vec);
1165
1166         std::vector<float> expected = {
1167             0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
1168             0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
1169             0.f, 0.f, 0.1f, 0.4f, 0.f, 0.f,
1170             0.f, 0.f, 1.8f, 2.0f, 0.f, 0.f,
1171             0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
1172             0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
1173         };
1174
1175         network network(engine, topology);
1176
1177         network.set_input_data("input", input);
1178         network.set_input_data("scale_input", scale_input);
1179
1180         auto outputs = network.execute();
1181
1182         auto output = outputs.at("scale").get_memory();
1183         auto output_ptr = output.pointer<float>();
1184
1185         for (unsigned int i = 0; i < expected.size(); ++i) {
1186             EXPECT_NEAR(output_ptr[i], expected[i], 1e-05F);
1187         }
1188     }
1189 }
1190 //////////////////////////////////////////////////////////////////////////////
1191 //                                                                          //
1192 //                      Exhaustive Negative Matrix tests                    //
1193 //                                                                          //
1194 //////////////////////////////////////////////////////////////////////////////
1195
1196 //TODO: this should be done using TEST_P or some equivallent construct
1197 static network setup_scale_network(
1198     const data_types dt,
1199     const tensor input_tensor,
1200     const tensor scale_tensor,
1201     const tensor bias_tensor,
1202     const format f,
1203     const format of,
1204     bool pass_bias          //TODO: a WA for lack of std::optional<tensor> bias
1205 )
1206 {
1207     engine engine;
1208     topology topology;
1209
1210     auto input_mem = memory::allocate(engine, { dt, f, input_tensor });
1211     auto scale_mem = memory::allocate(engine, { dt, of, scale_tensor });
1212     topology.add(input_layout("input", input_mem.get_layout()));
1213     topology.add(input_layout("scale_input", scale_mem.get_layout()));
1214
1215     if (pass_bias)
1216     {
1217         auto bias_mem = memory::allocate(engine, { dt, f, bias_tensor });
1218         topology.add(input_layout("bias_input", bias_mem.get_layout()));
1219
1220         topology.add(scale("scale", "input", "scale_input", "bias_input" ));
1221     }
1222     else
1223     {
1224         topology.add(scale("scale", "input", "scale_input"));
1225     }
1226 //TODO: this will be supported after the API change
1227 //    else
1228 //    {
1229 //        assert(!pass_bias);
1230 //
1231 //        topology.add(scale("scale", "input", "scale_input"));
1232 //    }
1233
1234     return network(engine, topology);
1235 }
1236
1237 TEST(NegativeScaleTest, TestAll) {
1238     auto d = data_types::f32;
1239     auto f = format::bfyx;
1240     auto of = format::yxfb;
1241
1242     std::vector<int> t { 3, 4, 5, 6 };
1243     std::vector<int> t2 { 5, 6, 4, 3 };
1244
1245     // broadcast rules mean that either the dim size is equal to input dim or is 1
1246     std::vector<std::vector<int>> good_ts =
1247     {
1248         { 1, 4, 5, 6 }, { 3, 1, 5, 6 }, { 3, 4, 1, 6 }, { 3, 4, 5, 1 },
1249         { 1, 1, 5, 6 }, { 1, 4, 1, 6 }, { 1, 4, 5, 1 }, { 3, 1, 1, 6 }, { 3, 1, 5, 1 }, { 3, 4, 1, 1 },
1250         { 1, 1, 1, 6 }, { 1, 1, 5, 1 }, { 1, 4, 1, 1 }, { 3, 1, 1, 1 }
1251     };
1252     std::vector<std::vector<int>> bad_ts = { { 2, 4, 5, 6 }, { 3, 2, 5, 6 }, { 3, 4, 2, 6 }, { 3, 4, 5, 2 } };
1253
1254     //TODO: should be ASSERT_THROW(statement, exception_type) - but what exception type?
1255     ASSERT_ANY_THROW(setup_scale_network(d, { }, { }, { }, f, of, false));
1256     ASSERT_ANY_THROW(setup_scale_network(d, { }, { }, { }, f, of, true));
1257
1258     ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(t2), tensor(t), f, of, true));
1259     ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(t2), tensor(t), f, of, false));
1260
1261     // make sure that it's the input that's masked in the scale/bias with a "1", not ther other way around
1262     for (const auto & good : good_ts)
1263     {
1264         ASSERT_ANY_THROW(setup_scale_network(d, tensor(good), tensor(t), tensor(t), f, of, true));
1265     }
1266
1267     // sizes must either be equal to input or at most have
1268     for (const auto & bad : bad_ts)
1269     {
1270         ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(bad), tensor(t), f, of, true));
1271         ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(t), tensor(bad), f, of, true));
1272
1273         for (const auto & good : good_ts)
1274         {
1275             ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(bad), tensor(good), f, of, true));
1276             ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(good), tensor(bad), f, of, true));
1277         }
1278     }
1279
1280     // we expect the broadcast mask to be identical for scale and bias, when present
1281     for (unsigned i = 0; i < good_ts.size(); ++i)
1282     for (unsigned j = 0; j < good_ts.size(); ++j)
1283         if (i != j)
1284         {
1285             ASSERT_ANY_THROW(setup_scale_network(d, tensor(t), tensor(good_ts[i]), tensor(good_ts[j]), f, of, true));
1286         }
1287
1288 }
1289
1290 //////////////////////////////////////////////////////////////////////////////
1291 //                                                                          //
1292 //                      Exhaustive Positive Matrix tests                    //
1293 //                                                                          //
1294 //////////////////////////////////////////////////////////////////////////////
1295
1296 using namespace cldnn;
1297
1298 class scale_test : public tests::generic_test
1299 {
1300 public:
1301     static void TearDownTestCase()
1302     {
1303         all_generic_params.clear();
1304         all_layer_params.clear();
1305     }
1306
1307     //TODO: use an enum instead of int i
1308     static std::vector<cldnn::primitive*> generate_specific_test_params(int variant)
1309     {
1310         std::vector<cldnn::primitive*> all_layer_params;
1311
1312         switch(variant)
1313         {
1314             case 0: all_layer_params.push_back(new scale("scale", "input0", "input1")); break;
1315             case 1: all_layer_params.push_back(new scale("scale", "input0", "input1", "input2")); break;
1316                     //    case 3: all_layer_params.push_back(new scale("scale", "input0", "input1", true));    // This case should be checked by negative_scale_test
1317                     //    case 4: all_layer_params.push_back(new scale("scale", "input0", "input1", false));    // This case should be checked by negative_scale_test
1318             default: assert(0);
1319         }
1320
1321         return all_layer_params;
1322     }
1323
1324     static std::vector<tests::test_params*> generate_generic_test_params(int variant)
1325     {
1326         assert(!variant || variant == 1);
1327
1328         std::vector<tests::test_params*> all_generic_params;
1329
1330         for (cldnn::data_types dt : test_data_types())
1331         for (tensor & t : test_input_sizes)
1332         {
1333             std::vector<std::vector<int>> attempted_dims;
1334
1335             for (int32_t b : test_batch_sizes)
1336             for (auto f : test_feature_sizes)
1337             for (int mask = 0; mask < 16; ++mask)    //TODO: do we want to restrict it to some smaller subset like for (auto mask : { 0, 1, 3, 7, 15, 5, 10})? the problem is that because of the layout we might miss some interesting combinations since this is effectively hardcoded int he kernels
1338             {
1339                 const int w = t.spatial[0];
1340                 const int h = t.spatial[1];
1341
1342                 const auto mb = mask & 0x8 ? b : 1;
1343                 const auto mf = mask & 0x4 ? f : 1;
1344                 const auto mh = mask & 0x2 ? h : 1;
1345                 const auto mw = mask & 0x1 ? w : 1;
1346
1347                 // avoid adding test cases with different masks leading to the same dimensions
1348                 if(attempted_dims.end() == std::find_if(attempted_dims.begin(), attempted_dims.end(), [=](const std::vector<int> & arr) { return arr[0] == mb && arr[1] == mf && arr[2] == mh && arr[3] == mw; }))
1349                 {
1350                     std::vector<int> tmp { mb, mf, mh, mw };
1351                     attempted_dims.push_back(tmp);
1352
1353                     test_params * tp = new test_params();
1354                     tp->data_type = dt;
1355
1356                     tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor(  b, f, w, h  )));
1357                     tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor(  mb, mf, mw, mh  )));
1358                     if (variant)
1359                             tp->input_layouts.push_back(cldnn::layout(tp->data_type, tp->fmt, cldnn::tensor(  mb, mf, mw, mh  )));
1360
1361                     all_generic_params.emplace_back(tp);
1362                 }
1363             }
1364         }
1365
1366         return all_generic_params;
1367     }
1368
1369     static std::vector<std::tuple<test_params*, cldnn::primitive*>> generate_all_test_params()
1370     {
1371         std::vector<std::tuple<test_params*, cldnn::primitive*>> res;
1372
1373         for (int variant = 0; variant <= 1; ++variant)
1374         {
1375             auto tpv = generate_generic_test_params(variant);
1376             auto pv = generate_specific_test_params(variant);
1377
1378             for (auto & tp : tpv)
1379                 all_generic_params.emplace_back(tp);
1380
1381             for (auto & p : pv)
1382                 all_layer_params.emplace_back(p);
1383
1384             for (auto & tp : tpv)
1385             for (auto & p: pv)
1386                 res.emplace_back(tp, p);
1387         }
1388
1389         return res;
1390     }
1391
1392     virtual bool is_format_supported(cldnn::format format) override
1393     {
1394         return format == cldnn_format_type::cldnn_format_bfyx;
1395     }
1396
1397     template<typename Type>
1398     memory generate_reference_typed(const std::vector<memory> & inputs)
1399     {
1400         assert(inputs.size() == 3 || inputs.size() == 2);
1401         const bool bias_input_present = inputs.size() == 3;
1402
1403         const memory & input = inputs[0];
1404         const memory & scale = inputs[1];
1405         const memory * bias = bias_input_present ? &inputs[2] : nullptr;
1406         assert(!bias_input_present || bias);
1407
1408         //Output is bfyx
1409         auto output = memory::allocate(engine, cldnn::layout(input.get_layout().data_type, cldnn::format::bfyx, input.get_layout().size ));
1410
1411         const auto in0_mem = input.pointer<Type>();
1412         const auto in1_mem = scale.pointer<Type>();
1413         const auto in2_mem_ptr = bias ? std::make_shared<pointer<Type>>(*bias) : nullptr;
1414         const Type * const in2_mem = in2_mem_ptr ? in2_mem_ptr->data() : nullptr; //TODO: is the condition needed or is it nullptr anyway?
1415         auto out_mem = output.pointer<Type>();
1416
1417         const auto input_sizes = input.get_layout().size.sizes(cldnn::format::bfyx);
1418
1419         const int in0_b = input_sizes[0];
1420         const int in0_f = input_sizes[1];
1421         const int in0_h = input_sizes[2];
1422         const int in0_w = input_sizes[3];
1423
1424         { // asserting dims
1425             const auto output_sizes = output.get_layout().size.sizes(cldnn::format::bfyx);
1426             const int out_b = output_sizes[0]; (void) out_b;
1427             const int out_f = output_sizes[1]; (void) out_f;
1428             const int out_h = output_sizes[2]; (void) out_h;
1429             const int out_w = output_sizes[3]; (void) out_w;
1430
1431             const auto scale_sizes = scale.get_layout().size.sizes(cldnn::format::bfyx);
1432             const int in1_b = scale_sizes[0]; (void) in1_b;
1433             const int in1_f = scale_sizes[1]; (void) in1_f;
1434             const int in1_h = scale_sizes[2]; (void) in1_h;
1435             const int in1_w = scale_sizes[3]; (void) in1_w;
1436             // input and output dims must match
1437             assert(in0_b == out_b && in0_f == out_f && in0_h == out_h && in0_w == out_w);
1438
1439             // scale/bias dims must be equal to in/out or be 1 for broadcast
1440             assert(in1_b == 1 || in1_b == in0_b);
1441             assert(in1_f == 1 || in1_f == in0_f);
1442             assert(in1_h == 1 || in1_h == in0_h);
1443             assert(in1_w == 1 || in1_w == in0_w);
1444
1445             if (bias)
1446             {
1447                 const auto bias_sizes = bias->get_layout().size.sizes(cldnn::format::bfyx);
1448                 const int in2_b = bias_sizes[0]; (void) in2_b;
1449                 const int in2_f = bias_sizes[1]; (void) in2_f;
1450                 const int in2_h = bias_sizes[2]; (void) in2_h;
1451                 const int in2_w = bias_sizes[3]; (void) in2_w;
1452
1453                 // scale/bias dims must be equal to in/out or be 1 for broadcast
1454                 assert(in2_b == 1 || in2_b == in1_b);
1455                 assert(in2_b == 1 || in2_f == in1_f);
1456                 assert(in2_b == 1 || in2_h == in1_h);
1457                 assert(in2_b == 1 || in2_w == in1_w);
1458             }
1459         }
1460
1461         const auto input_desc = get_linear_memory_desc(input.get_layout());
1462         const auto output_desc = get_linear_memory_desc(output.get_layout());
1463         const auto scale_desc = get_linear_memory_desc(scale.get_layout());
1464         const auto bias_desc =
1465             bias ?
1466             get_linear_memory_desc(bias->get_layout()) :
1467             memory_desc();
1468
1469         for (int n = 0; n < in0_b; ++n)
1470         for (int c = 0; c < in0_f; ++c)
1471         for (int y = 0; y < in0_h; ++y)
1472         for (int x = 0; x < in0_w; ++x)
1473         {
1474             const size_t in0_idx = get_linear_index(input.get_layout(), n, c, y, x, input_desc);
1475             const size_t in1_idx = get_linear_index_with_broadcast(scale.get_layout(), n, c, y, x, scale_desc);
1476             const size_t out_idx = get_linear_index(output.get_layout(), n, c, y, x, output_desc);
1477
1478             out_mem[out_idx] = in0_mem[in0_idx] * in1_mem[in1_idx];
1479
1480             if (bias)
1481             {
1482                 const size_t in2_idx = get_linear_index_with_broadcast(bias->get_layout(), n, c, y, x, bias_desc);
1483                 out_mem[out_idx] += in2_mem[in2_idx];
1484             }
1485         }
1486         return output;
1487     }
1488
1489     virtual memory generate_reference(const std::vector<memory> & inputs) override
1490     {
1491         if (generic_params->data_type == data_types::f32)
1492         {
1493             return generate_reference_typed<float>(inputs);
1494         }
1495         else
1496         {
1497             return generate_reference_typed<FLOAT16>(inputs);
1498         }
1499     }
1500
1501     static std::string custom_param_name(const ::testing::TestParamInfo<std::tuple<test_params*, cldnn::primitive*>>& info)
1502     {
1503         std::stringstream res;
1504
1505         const auto & p = std::get<0>(info.param);
1506
1507         assert (p->data_type == data_types::f32 ||
1508                 p->data_type == data_types::f16);
1509
1510         res << info.index
1511             << "_" << (p->data_type == data_types::f32 ? "f32" : "f16");
1512
1513         for (unsigned i = 0; i < p->input_layouts.size(); ++i)
1514         {
1515             if (i == 0) res << "_Input";
1516             if (i == 1) res << "_ScaleInput";
1517             if (i == 2) res << "_BiasInput";
1518
1519             const auto chans = format::traits(p->fmt).order;
1520
1521             for (unsigned int j = 0; j < p->input_layouts[i].size.sizes(p->fmt).size(); ++j)
1522             {
1523                 res << chans[j] << p->input_layouts[i].size.sizes(p->fmt)[j];
1524             }
1525         }
1526         return res.str();
1527     }
1528
1529 private:
1530     static std::vector<std::unique_ptr<tests::test_params>> all_generic_params;
1531     static std::vector<std::unique_ptr<cldnn::primitive>> all_layer_params;
1532 };
1533
1534 std::vector<std::unique_ptr<cldnn::primitive>> scale_test::all_layer_params = {};
1535 std::vector<std::unique_ptr<tests::test_params>> scale_test::all_generic_params = {};
1536
1537 TEST_P(scale_test, SCALE)
1538 {
1539     run_single_test();
1540 }
1541
1542 INSTANTIATE_TEST_CASE_P(DISABLED_SCALE,
1543     scale_test,
1544     ::testing::ValuesIn(scale_test::generate_all_test_params()),
1545     scale_test::custom_param_name);