inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18
  19 #include <gtest/gtest.h>
  20 #include <api/CPP/engine.hpp>
  21 #include <api/CPP/memory.hpp>
  22 #include <api/CPP/topology.hpp>
  23 #include <api/CPP/network.hpp>
  24 #include <api/CPP/input_layout.hpp>
  25 #include <api/CPP/activation.hpp>
  26 #include <api/CPP/pooling.hpp>
  27 #include <api/CPP/concatenation.hpp>
  28 #include <api/CPP/data.hpp>
  29 #include <api/CPP/reshape.hpp>
  30 #include <api/CPP/crop.hpp>
  31 #include <api/CPP/scale.hpp>
  32
  33 #include "test_utils/test_utils.h"
  34
  35 using namespace cldnn;
  36 using namespace tests;
  37
  38 #if 0
  39 TEST(memory_tests, DISABLED_execution_loop)
  40 {
  41     engine eng;
  42
  43     memory in = memory::allocate(eng, layout{ data_types::f32, format::bfyx, { 1, 1, 1000, 1000 } });
  44
  45     topology tpl{
  46         input_layout("in", in.get_layout()),
  47         activation("out", "in", activation_linear)
  48     };
  49
  50     network net(eng, tpl);
  51
  52     while (true)
  53     {
  54         net.set_input_data("in", in);
  55         net.execute();
  56     }
  57 }
  58
  59 TEST(memory_tests, DISABLED_network_creation_loop)
  60 {
  61     engine eng;
  62
  63     memory in = memory::allocate(eng, layout{ data_types::f32, format::bfyx,{ 1, 1, 1000, 1000 } });
  64
  65     topology tpl{
  66         input_layout("in", in.get_layout()),
  67         activation("out", "in", activation_linear)
  68     };
  69
  70     while (true)
  71     {
  72         network net(eng, tpl);
  73     }
  74 }
  75 #endif
  76 TEST(memory_pool, basic_non_padded_relu_pipe) {
  77     // 5 relu's of size 1x4x1x1
  78     const cldnn::engine engine;// here we need new engine
  79     auto batch_num = 1;
  80     auto feature_num = 4;
  81     auto x_size = 1;
  82     auto y_size = 1;
  83
  84     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
  85
  86     topology topology;
  87     topology.add(input_layout("input", input.get_layout()));
  88     topology.add(activation("relu", "input", activation_relu));
  89     topology.add(activation("relu1", "relu", activation_relu));
  90     topology.add(activation("relu2", "relu1", activation_relu));
  91     topology.add(activation("relu3", "relu2", activation_relu));
  92     topology.add(activation("relu4", "relu3", activation_relu));
  93     topology.add(activation("relu5", "relu4", activation_relu));
  94
  95     std::vector<float> input_vec = { -1.f, 2.f, -3.f, 4.f };
  96     set_values(input, input_vec);
  97     build_options bo;
  98     bo.set_option(build_option::optimize_data(true));
  99
 100     network network(engine, topology, bo);
 101     network.set_input_data("input", input);
 102     auto outputs = network.execute();
 103
 104     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 80);
 105  }
 106
 107
 108 TEST(memory_pool, basic_non_padded_relu_and_pooling_pipe) {
 109     // uncomment this line to disable memory pool
 110     /*engine_configuration cfg{ false, false, false, std::string(), std::string(), true, std::string(),std::string(), 0, false };
 111     engine engine{ cfg };*/
 112     const cldnn::engine engine;// here we need new engine
 113     auto batch_num = 1;
 114     auto feature_num = 4;
 115     auto x_size = 4;
 116     auto y_size = 4;
 117
 118     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
 119
 120     topology topology;
 121     topology.add(input_layout("input", input.get_layout()));
 122     topology.add(activation("relu", "input", activation_relu));
 123     topology.add(activation("relu1", "relu", activation_relu));
 124     topology.add(pooling("pool1", "relu1",pooling_mode::max, { 1,1,3,3 }, { 1,1,2,2 }));
 125     topology.add(activation("relu2", "pool1", activation_relu));
 126     topology.add(activation("relu3", "relu2", activation_relu));
 127     topology.add(activation("relu4", "relu3", activation_relu));
 128     topology.add(activation("relu5", "relu4", activation_relu));
 129
 130     build_options bo;
 131     bo.set_option(build_option::optimize_data(true));
 132
 133     network network(engine, topology, bo);
 134     network.set_input_data("input", input);
 135     auto outputs = network.execute();
 136
 137     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)1088);
 138 }
 139
 140
 141 TEST(memory_pool, multi_outputs_network) {
 142     //            -- relu -- relu1 -- relu4
 143     //     input<
 144     //            -- relu2 --  relu3 -- relu5--relu6--relu7
 145     // neither of relu5, relu6 nor relu7 can share resource with relu4.
 146
 147     // uncomment this line to disable memory pool
 148     /*engine_configuration cfg{ false, false, false, std::string(), std::string(), true, std::string(),std::string(), 0, false };
 149     engine engine{ cfg };*/
 150     const cldnn::engine engine;// here we need new engine
 151     auto batch_num = 1;
 152     auto feature_num = 4;
 153     auto x_size = 4;
 154     auto y_size = 4;
 155
 156     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
 157
 158     topology topology;
 159     topology.add(input_layout("input", input.get_layout()));
 160     topology.add(activation("relu", "input", activation_relu));
 161     topology.add(activation("relu1", "relu", activation_relu));
 162     topology.add(activation("relu2", "input", activation_relu));
 163     topology.add(activation("relu3", "relu2", activation_relu));
 164     topology.add(activation("relu4", "relu1", activation_relu));
 165     topology.add(activation("relu5", "relu3", activation_relu));
 166     topology.add(activation("relu6", "relu5", activation_relu));
 167     topology.add(activation("relu7", "relu6", activation_relu));
 168
 169     build_options bo;
 170     bo.set_option(build_option::optimize_data(true));
 171
 172     network network(engine, topology, bo);
 173     network.set_input_data("input", input);
 174     auto outputs = network.execute();
 175
 176     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)2048);
 177 }
 178
 179
 180 TEST(memory_pool, oooq) {
 181     /*          -- relu1 - concat1- relu4 --
 182         input<  -- relu2 /                   >-- concat2 -- relu6
 183                 -- relu3 --  relu5 ---------
 184        neither of relu5, relu6 nor relu7 can share resource with relu4. */
 185
 186     engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
 187     engine engine{ cfg };
 188     auto batch_num = 1;
 189     auto feature_num = 4;
 190     auto x_size = 4;
 191     auto y_size = 4;
 192
 193     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
 194
 195     topology topology;
 196     topology.add(input_layout("input", input.get_layout()));
 197     topology.add(activation("relu1", "input", activation_relu));
 198     topology.add(activation("relu2", "input", activation_relu));
 199     topology.add(activation("relu3", "input", activation_relu));
 200     topology.add(concatenation("concat1", { "relu1", "relu2"},concatenation::along_f));
 201     topology.add(activation("relu4", "concat1", activation_relu));
 202     topology.add(activation("relu5", "relu3", activation_relu));
 203     topology.add(concatenation("concat2", { "relu4", "relu5" }, concatenation::along_f));
 204     topology.add(activation("relu6", "concat2", activation_relu));
 205
 206     build_options bo;
 207     bo.set_option(build_option::optimize_data(true));
 208
 209     network network(engine, topology, bo);
 210     network.set_input_data("input", input);
 211     auto outputs = network.execute();
 212
 213     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 2816);
 214 }
 215
 216 TEST(memory_pool, shared_mem_pool_same_topology_twice) {
 217     /*                -- relu1 - concat1- relu4 --
 218     input<  -- relu2 |                             >-- concat2 -- relu6
 219                       -- relu3 --  relu5 ---------
 220     neither of relu5, relu6 nor relu7 can share resource with relu4. */
 221
 222     engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
 223     engine engine{ cfg };
 224     auto batch_num = 1;
 225     auto feature_num = 4;
 226     auto inp_x_size = 4;
 227     auto inp_y_size = 4;
 228
 229     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } });
 230
 231     set_values(input,
 232     {   1.0f, 2.5f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 6.1f, 4.7f, 1.0f, 1.0f, 8.2f, 1.0f, 2.0f, 1.0f,
 233         5.0f, 2.0f, 2.0f, 3.0f, 5.0f, 2.0f, 2.0f, 3.0f, 1.1f, 2.4f, 1.0f, 1.0f, 4.0f, 6.0f, 3.0f, 3.6f,
 234         4.0f, 6.0f, 3.0f, 3.0f, 1.0f, 1.0f, 1.5f, 1.0f, 4.0f, 6.5f, 3.0f, 3.0f, 4.0f, 6.0f, 1.8f, 3.5f,
 235         3.0f, 5.0f, 1.0f, 1.0f, 1.3f, 1.0f, 0.4f, 1.3f, 4.0f, 7.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.9f, 4.0f
 236     });
 237
 238     topology topology;
 239     topology.add(input_layout("input", input.get_layout()));
 240     topology.add(activation("relu1", "input", activation_relu));
 241     topology.add(activation("relu2", "input", activation_sqrt));
 242     topology.add(activation("relu3", "input", activation_square));
 243     topology.add(concatenation("concat1", { "relu1", "relu2" }, concatenation::along_f));
 244     topology.add(activation("relu4", "concat1", activation_relu));
 245     topology.add(activation("relu5", "relu3", activation_relu));
 246     topology.add(concatenation("concat2", { "relu4", "relu5" }, concatenation::along_f));
 247     topology.add(activation("relu6", "concat2", activation_linear, {1.0f, 0.5f}));
 248
 249     build_options bo;
 250     bo.set_option(build_option::optimize_data(true));
 251
 252     network network_first(engine, topology, bo);
 253     network_first.set_input_data("input", input);
 254     auto outputs = network_first.execute();
 255
 256     auto output_memory_first = outputs.at("relu6").get_memory();
 257     auto output_layout_first = output_memory_first.get_layout();
 258     auto output_ptr_first = output_memory_first.pointer<float>();
 259
 260     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 2816);
 261
 262     network network_second(engine, topology, bo);
 263     network_second.set_input_data("input", input);
 264     auto outputs_second = network_second.execute();
 265
 266     auto output_memory_second = outputs_second.at("relu6").get_memory();
 267     auto output_layout_second = output_memory_second.get_layout();
 268     auto output_ptr_second = output_memory_second.pointer<float>();
 269
 270     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 3584);
 271     EXPECT_EQ(output_layout_first, output_layout_second);
 272
 273     int y_size = output_layout_first.size.spatial[1];
 274     int x_size = output_layout_first.size.spatial[0];
 275     int f_size = output_layout_first.size.feature[0];
 276     int b_size = output_layout_first.size.batch[0];
 277     int f_offset = y_size*x_size;
 278     int b_offset = f_size * f_offset;
 279     for (int b = 0; b < b_size; ++b)
 280     {
 281         for (int f = 0; f < f_size; ++f)
 282         {
 283             for (int y = 0; y < y_size; ++y)
 284             {
 285                 for (int x = 0; x < x_size; ++x)
 286                 {
 287                     int idx = b * b_offset + f * f_offset + y * x_size + x;
 288                     EXPECT_EQ(output_ptr_first[idx], output_ptr_second[idx]);
 289                 }
 290             }
 291         }
 292     }
 293 }
 294
 295 TEST(memory_pool, shared_mem_pool_same_topology_twice_weights) {
 296
 297     engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
 298     engine engine{ cfg };
 299     auto batch_num = 1;
 300     auto feature_num = 3;
 301     auto inp_x_size = 4;
 302     auto inp_y_size = 4;
 303
 304     auto input= memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } });
 305     auto weights = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 3, 2 } });
 306
 307     std::vector<float> dummy_input_data_1 = {
 308        /*f0 xy*/ 0.8f, 0.65f, 0.1f, 1.0f, 1.0f, 0.5f, 0.11f, 0.33f, 0.66f, 0.11f, 0.22f, 0.33f, 0.99f, 0.8f, 0.7f, 0.5f,
 309        /*f1 xy*/ 0.48f, 0.05f, 0.35f, 1.0f, 1.0f, 0.51f, 0.51f, 0.13f, 0.86f, 0.10f, 0.29f, 0.53f, 0.99f, 0.4f, 0.3f, 0.1f,
 310        /*f2 xy*/ 0.98f, 0.35f, 0.3f, 0.01f, 0.9f, 0.55f, 0.15f, 0.39f, 0.36f, 0.01f, 0.32f, 0.4f, 0.3f, 0.2f, 0.1f, 0.5f,
 311     };
 312
 313     set_values(input, dummy_input_data_1);
 314     set_values(weights, { 0.10f, 0.2f, 0.1f, 0.2f, 0.1f, 0.2f });
 315
 316     topology topology(
 317         input_layout("input", input.get_layout()),
 318         data("weights", weights),
 319         convolution("conv", "input", { "weights" }, { 1, 1, 1, 2 }),
 320         softmax("softmax", "conv"));
 321
 322     build_options bo;
 323     bo.set_option(build_option::optimize_data(true));
 324
 325     network network_first(engine, topology, bo);
 326     network_first.set_input_data("input", input);
 327     auto outputs = network_first.execute();
 328
 329     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)824);
 330
 331     auto output_memory_first = outputs.at("softmax").get_memory();
 332     auto output_layout_first = output_memory_first.get_layout();
 333     auto output_ptr_first = output_memory_first.pointer<float>();
 334
 335     network network_second(engine, topology, bo);
 336     network_second.set_input_data("input", input);
 337     auto outputs_second = network_second.execute();
 338
 339     auto output_memory_second = outputs_second.at("softmax").get_memory();
 340     auto output_layout_second = output_memory_second.get_layout();
 341     auto output_ptr_second = output_memory_second.pointer<float>();
 342
 343     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)1224);
 344     EXPECT_EQ(output_layout_first, output_layout_second);
 345
 346     int y_size = output_layout_first.size.spatial[1];
 347     int x_size = output_layout_first.size.spatial[0];
 348     int f_size = output_layout_first.size.feature[0];
 349     int b_size = output_layout_first.size.batch[0];
 350     int f_offset = y_size * x_size;
 351     int b_offset = f_size * f_offset;
 352     for (int b = 0; b < b_size; ++b)
 353     {
 354         for (int f = 0; f < f_size; ++f)
 355         {
 356             for (int y = 0; y < y_size; ++y)
 357             {
 358                 for (int x = 0; x < x_size; ++x)
 359                 {
 360                     int idx = b * b_offset + f * f_offset + y * x_size + x;
 361                     EXPECT_EQ(output_ptr_first[idx], output_ptr_second[idx]);
 362                 }
 363             }
 364         }
 365     }
 366 }
 367
 368
 369 TEST(memory_pool, shared_mem_pool_diff_batches) {
 370
 371     engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
 372     engine engine{ cfg };
 373     auto batch_8 = 8;
 374     auto batch_1 = 1;
 375     auto feature_num = 3;
 376     auto inp_x_size = 4;
 377     auto inp_y_size = 4;
 378     auto dt = data_types::f32;
 379     auto fmt = format::bfyx;
 380     layout lay_batch_1 = { dt, fmt, { tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_1)) }};
 381     layout lay_batch_8 = { dt, fmt, { tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_8)) }};
 382     auto input_1 = memory::allocate(engine, lay_batch_1);
 383     auto input_8 = memory::allocate(engine, lay_batch_8);
 384     auto weights = memory::allocate(engine, { dt, fmt, { 1, 1, 3, 2 } });
 385
 386     std::vector<float> dummy_input_data_1 = generate_random_1d<float>(batch_1*feature_num*inp_x_size*inp_y_size, 0, 1);
 387     std::vector<float> dummy_input_data_8 = generate_random_1d<float>(batch_8*feature_num*inp_x_size*inp_y_size, 0, 1);
 388
 389     set_values(input_1, dummy_input_data_1);
 390     set_values(input_8, dummy_input_data_8);
 391     set_values(weights, { 0.10f, 0.2f, 0.1f, 0.2f, 0.1f, 0.2f });
 392
 393     topology topo(
 394         input_layout("input", input_8.get_layout()),
 395         data("weights", weights),
 396         convolution("conv", "input", { "weights" }, { 1, 1, 1, 2 }),
 397         softmax("softmax", "conv"));
 398
 399     build_options bo;
 400     bo.set_option(build_option::optimize_data(true));
 401
 402     network network_first(engine, topo, bo);
 403     network_first.set_input_data("input", input_8);
 404     auto outputs = network_first.execute();
 405
 406     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)3928);
 407
 408     topo.change_input_layout("input", input_1.get_layout());//change input layout to batch=1
 409
 410     network network_second(engine, topo, bo);
 411     network_second.set_input_data("input", input_1);
 412     auto outputs_second = network_second.execute();
 413
 414     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)3928);
 415 }
 416
 417 TEST(memory_pool, shared_dep_two_output) {
 418
 419     engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
 420     engine engine{ cfg };
 421     auto batch_1 = 1;
 422     auto feature_num = 1;
 423     auto inp_x_size = 4;
 424     auto inp_y_size = 4;
 425     auto dt = data_types::f32;
 426     auto fmt = format::bfyx;
 427     layout lay_batch_1 = { dt, fmt,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_1)) } };
 428     auto input_1 = memory::allocate(engine, lay_batch_1);
 429     set_random_values<float>(input_1);
 430
 431     //build primitives
 432     auto constant_0_0 = cldnn::data(
 433         "constant_0_0",
 434         input_1
 435     );
 436     auto result_1_0 = cldnn::concatenation(
 437         "result_1_0",
 438         { constant_0_0 },
 439         cldnn::concatenation::along_b
 440     );
 441     auto result_2_0 = cldnn::concatenation(
 442         "result_2_0",
 443         { constant_0_0 },
 444         cldnn::concatenation::along_b
 445     );
 446
 447     //build and execute network
 448     topology topo;
 449     topo.add(constant_0_0);
 450     topo.add(result_1_0);
 451     topo.add(result_2_0);
 452
 453     build_options bo;
 454     bo.set_option(build_option::optimize_data(true));
 455
 456     network network(engine, topo, bo);
 457     auto outputs = network.execute();
 458     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)256);
 459 }
 460
 461 TEST(memory_pool, non_opt_intermidate_opt_after) {
 462
 463     engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
 464     engine engine{ cfg };
 465     auto input_layout1 = layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 1, 2, 2 });
 466     auto input_layout2 = layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 1, 2, 2 });
 467
 468     auto input_memory1 = cldnn::memory::allocate(engine, input_layout1);
 469     auto input_memory2 = cldnn::memory::allocate(engine, input_layout2);
 470     auto scale_memory = cldnn::memory::allocate(engine, layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,1,1 }));
 471     auto data_memory = cldnn::data("scale_mem", scale_memory);
 472
 473     set_values(input_memory1, { 1.0f, 2.0f, 3.0f, 4.0f });
 474     set_values(input_memory2, { 5.0f, 6.0f, 7.0f, 8.0f });
 475     set_values(scale_memory, { 1.0f});
 476
 477     auto reshape_tensor = cldnn::tensor(8, 1, 1, 1);
 478     auto input = cldnn::input_layout("input1", input_layout1);
 479     auto input2 = cldnn::input_layout("input2", input_layout2);
 480     auto concat = cldnn::concatenation("concat", { "input1", "input2" }, cldnn::concatenation::along_b);
 481     auto reshape = cldnn::reshape("reshape", "concat", reshape_tensor);
 482     auto crop1 = cldnn::crop("crop1", "reshape", { 1,1,1,1 }, { 0, 0, 0, 0 });
 483     auto crop2 = cldnn::crop("crop2", "reshape", { 1,1,1,1 }, { 1, 0, 0, 0 });
 484     auto eltwise1 = cldnn::scale("elt1", "crop1", "scale_mem");
 485     auto eltwise2 = cldnn::scale("elt2", "crop2", "scale_mem");
 486
 487     auto topology = cldnn::topology(
 488         input, input2,
 489         concat,
 490         reshape,
 491         crop1, crop2,
 492         eltwise1, eltwise2,
 493         data_memory
 494     );
 495
 496     build_options bo;
 497     bo.set_option(build_option::optimize_data(false));
 498     network network(engine, topology, bo);
 499     network.set_input_data("input1", input_memory1);
 500     network.set_input_data("input2", input_memory2);
 501     auto outputs = network.execute();
 502     EXPECT_EQ(outputs.size(), static_cast<size_t>(2));
 503
 504     auto out1 = outputs.at("elt1");
 505     auto out2 = outputs.at("elt2");
 506
 507     auto out1_ptr = out1.get_memory().pointer<float>();
 508     auto out2_ptr = out2.get_memory().pointer<float>();
 509     EXPECT_EQ(out1_ptr[0], 1.0f);
 510     EXPECT_EQ(out2_ptr[0], 2.0f);
 511 }
 512
 513 TEST(memory_pool, add_mem_dep_test) {
 514
 515     engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
 516     engine engine{ cfg };
 517     auto input_layout1 = layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 2, 2, 2 });
 518
 519     auto input_memory1 = cldnn::memory::allocate(engine, input_layout1);
 520     auto scale_memory = cldnn::memory::allocate(engine, layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,1,1 }));
 521     auto data_memory = cldnn::data("scale_mem", scale_memory);
 522
 523     set_values(input_memory1, { 1.0f, 2.0f, 3.0f, 4.0f,
 524         5.0f, 6.0f, 7.0f, 8.0f});
 525     set_values(scale_memory, { 1.0f });
 526
 527
 528     auto input = cldnn::input_layout("input1", input_layout1);
 529     auto actv1 = cldnn::activation("input_activ1", "input1", cldnn_activation_func::activation_abs);
 530     auto actv2 = cldnn::activation("input_activ2", "input1", cldnn_activation_func::activation_abs);
 531     auto crop1 = cldnn::crop("crop1", "input_activ1", { 1,1,2,2 }, { 0, 0, 0, 0 });
 532     auto crop2 = cldnn::crop("crop2", "input_activ2", { 1,1,2,2 }, { 0, 1, 0, 0 });
 533     auto eltwise1 = cldnn::scale("elt1", "crop1", "scale_mem");
 534     auto eltwise2 = cldnn::scale("elt2", "crop2", "scale_mem");
 535     auto actv3 = cldnn::activation("out3", "elt1", cldnn_activation_func::activation_abs);
 536     auto actv4 = cldnn::activation("out4", "elt2", cldnn_activation_func::activation_abs);
 537
 538     auto topology = cldnn::topology(
 539         input,
 540         crop1, crop2,
 541         actv1, actv2,
 542         eltwise1, eltwise2,
 543         data_memory,
 544         actv3, actv4
 545     );
 546
 547     build_options bo;
 548     bo.set_option(build_option::optimize_data(true));
 549     network network(engine, topology, bo);
 550     network.set_input_data("input1", input_memory1);
 551     auto outputs = network.execute();
 552     EXPECT_EQ(outputs.size(), static_cast<size_t>(2));
 553
 554     auto out1 = outputs.at("out3");
 555     auto out2 = outputs.at("out4");
 556
 557     auto out1_ptr = out1.get_memory().pointer<float>();
 558     auto out2_ptr = out2.get_memory().pointer<float>();
 559     EXPECT_EQ(out1_ptr[0], 1.0f);
 560     EXPECT_EQ(out1_ptr[1], 2.0f);
 561     EXPECT_EQ(out1_ptr[2], 3.0f);
 562     EXPECT_EQ(out1_ptr[3], 4.0f);
 563
 564     EXPECT_EQ(out2_ptr[0], 5.0f);
 565     EXPECT_EQ(out2_ptr[1], 6.0f);
 566     EXPECT_EQ(out2_ptr[2], 7.0f);
 567     EXPECT_EQ(out2_ptr[3], 8.0f);
 568 }