Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / tests / test_cases / memory_test.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18
19 #include <gtest/gtest.h>
20 #include <api/CPP/engine.hpp>
21 #include <api/CPP/memory.hpp>
22 #include <api/CPP/topology.hpp>
23 #include <api/CPP/network.hpp>
24 #include <api/CPP/input_layout.hpp>
25 #include <api/CPP/activation.hpp>
26 #include <api/CPP/pooling.hpp>
27 #include <api/CPP/concatenation.hpp>
28 #include <api/CPP/data.hpp>
29 #include <api/CPP/reshape.hpp>
30 #include <api/CPP/crop.hpp>
31 #include <api/CPP/scale.hpp>
32
33 #include "test_utils/test_utils.h"
34
35 using namespace cldnn;
36 using namespace tests;
37
38 #if 0
39 TEST(memory_tests, DISABLED_execution_loop)
40 {
41     engine eng;
42
43     memory in = memory::allocate(eng, layout{ data_types::f32, format::bfyx, { 1, 1, 1000, 1000 } });
44
45     topology tpl{
46         input_layout("in", in.get_layout()),
47         activation("out", "in", activation_linear)
48     };
49
50     network net(eng, tpl);
51     
52     while (true)
53     {
54         net.set_input_data("in", in);
55         net.execute();
56     }
57 }
58
59 TEST(memory_tests, DISABLED_network_creation_loop)
60 {
61     engine eng;
62
63     memory in = memory::allocate(eng, layout{ data_types::f32, format::bfyx,{ 1, 1, 1000, 1000 } });
64
65     topology tpl{
66         input_layout("in", in.get_layout()),
67         activation("out", "in", activation_linear)
68     };
69
70     while (true)
71     {
72         network net(eng, tpl);
73     }
74 }
75 #endif
76 TEST(memory_pool, basic_non_padded_relu_pipe) {
77     // 5 relu's of size 1x4x1x1
78     const cldnn::engine engine;// here we need new engine
79     auto batch_num = 1;
80     auto feature_num = 4;
81     auto x_size = 1;
82     auto y_size = 1;
83
84     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
85
86     topology topology;
87     topology.add(input_layout("input", input.get_layout()));
88     topology.add(activation("relu", "input", activation_relu));
89     topology.add(activation("relu1", "relu", activation_relu));
90     topology.add(activation("relu2", "relu1", activation_relu));
91     topology.add(activation("relu3", "relu2", activation_relu));
92     topology.add(activation("relu4", "relu3", activation_relu));
93     topology.add(activation("relu5", "relu4", activation_relu));
94
95     std::vector<float> input_vec = { -1.f, 2.f, -3.f, 4.f };
96     set_values(input, input_vec);
97     build_options bo;
98     bo.set_option(build_option::optimize_data(true));
99
100     network network(engine, topology, bo);
101     network.set_input_data("input", input);
102     auto outputs = network.execute();
103
104     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 80);
105  }
106
107
108 TEST(memory_pool, basic_non_padded_relu_and_pooling_pipe) {
109     // uncomment this line to disable memory pool
110     /*engine_configuration cfg{ false, false, false, std::string(), std::string(), true, std::string(),std::string(), 0, false };
111     engine engine{ cfg };*/
112     const cldnn::engine engine;// here we need new engine
113     auto batch_num = 1;
114     auto feature_num = 4;
115     auto x_size = 4;
116     auto y_size = 4;
117
118     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
119
120     topology topology;
121     topology.add(input_layout("input", input.get_layout()));
122     topology.add(activation("relu", "input", activation_relu));
123     topology.add(activation("relu1", "relu", activation_relu));
124     topology.add(pooling("pool1", "relu1",pooling_mode::max, { 1,1,3,3 }, { 1,1,2,2 }));
125     topology.add(activation("relu2", "pool1", activation_relu));
126     topology.add(activation("relu3", "relu2", activation_relu));
127     topology.add(activation("relu4", "relu3", activation_relu));
128     topology.add(activation("relu5", "relu4", activation_relu));
129
130     build_options bo;
131     bo.set_option(build_option::optimize_data(true));
132
133     network network(engine, topology, bo);
134     network.set_input_data("input", input);
135     auto outputs = network.execute();
136
137     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)1088);
138 }
139
140
141 TEST(memory_pool, multi_outputs_network) {
142     //            -- relu -- relu1 -- relu4
143     //     input<           
144     //            -- relu2 --  relu3 -- relu5--relu6--relu7
145     // neither of relu5, relu6 nor relu7 can share resource with relu4. 
146
147     // uncomment this line to disable memory pool
148     /*engine_configuration cfg{ false, false, false, std::string(), std::string(), true, std::string(),std::string(), 0, false };
149     engine engine{ cfg };*/
150     const cldnn::engine engine;// here we need new engine
151     auto batch_num = 1;
152     auto feature_num = 4;
153     auto x_size = 4;
154     auto y_size = 4;
155
156     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
157
158     topology topology;
159     topology.add(input_layout("input", input.get_layout()));
160     topology.add(activation("relu", "input", activation_relu));
161     topology.add(activation("relu1", "relu", activation_relu));
162     topology.add(activation("relu2", "input", activation_relu));
163     topology.add(activation("relu3", "relu2", activation_relu));
164     topology.add(activation("relu4", "relu1", activation_relu));
165     topology.add(activation("relu5", "relu3", activation_relu));
166     topology.add(activation("relu6", "relu5", activation_relu));
167     topology.add(activation("relu7", "relu6", activation_relu));
168
169     build_options bo;
170     bo.set_option(build_option::optimize_data(true));
171
172     network network(engine, topology, bo);
173     network.set_input_data("input", input);
174     auto outputs = network.execute();
175
176     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)2048);
177 }
178
179
180 TEST(memory_pool, oooq) {
181     /*          -- relu1 - concat1- relu4 -- 
182         input<  -- relu2 /                   >-- concat2 -- relu6
183                 -- relu3 --  relu5 --------- 
184        neither of relu5, relu6 nor relu7 can share resource with relu4. */
185
186     engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
187     engine engine{ cfg };
188     auto batch_num = 1;
189     auto feature_num = 4;
190     auto x_size = 4;
191     auto y_size = 4;
192
193     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
194
195     topology topology;
196     topology.add(input_layout("input", input.get_layout()));
197     topology.add(activation("relu1", "input", activation_relu));
198     topology.add(activation("relu2", "input", activation_relu));
199     topology.add(activation("relu3", "input", activation_relu));
200     topology.add(concatenation("concat1", { "relu1", "relu2"},concatenation::along_f));
201     topology.add(activation("relu4", "concat1", activation_relu));
202     topology.add(activation("relu5", "relu3", activation_relu));
203     topology.add(concatenation("concat2", { "relu4", "relu5" }, concatenation::along_f));
204     topology.add(activation("relu6", "concat2", activation_relu));
205
206     build_options bo;
207     bo.set_option(build_option::optimize_data(true));
208
209     network network(engine, topology, bo);
210     network.set_input_data("input", input);
211     auto outputs = network.execute();
212
213     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 2816);
214 }
215
216 TEST(memory_pool, shared_mem_pool_same_topology_twice) {
217     /*                -- relu1 - concat1- relu4 --
218     input<  -- relu2 |                             >-- concat2 -- relu6
219                       -- relu3 --  relu5 ---------
220     neither of relu5, relu6 nor relu7 can share resource with relu4. */
221
222     engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
223     engine engine{ cfg };
224     auto batch_num = 1;
225     auto feature_num = 4;
226     auto inp_x_size = 4;
227     auto inp_y_size = 4;
228
229     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } });
230
231     set_values(input, 
232     {   1.0f, 2.5f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 6.1f, 4.7f, 1.0f, 1.0f, 8.2f, 1.0f, 2.0f, 1.0f,
233         5.0f, 2.0f, 2.0f, 3.0f, 5.0f, 2.0f, 2.0f, 3.0f, 1.1f, 2.4f, 1.0f, 1.0f, 4.0f, 6.0f, 3.0f, 3.6f,
234         4.0f, 6.0f, 3.0f, 3.0f, 1.0f, 1.0f, 1.5f, 1.0f, 4.0f, 6.5f, 3.0f, 3.0f, 4.0f, 6.0f, 1.8f, 3.5f,
235         3.0f, 5.0f, 1.0f, 1.0f, 1.3f, 1.0f, 0.4f, 1.3f, 4.0f, 7.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.9f, 4.0f
236     });
237
238     topology topology;
239     topology.add(input_layout("input", input.get_layout()));
240     topology.add(activation("relu1", "input", activation_relu));
241     topology.add(activation("relu2", "input", activation_sqrt));
242     topology.add(activation("relu3", "input", activation_square));
243     topology.add(concatenation("concat1", { "relu1", "relu2" }, concatenation::along_f));
244     topology.add(activation("relu4", "concat1", activation_relu));
245     topology.add(activation("relu5", "relu3", activation_relu));
246     topology.add(concatenation("concat2", { "relu4", "relu5" }, concatenation::along_f));
247     topology.add(activation("relu6", "concat2", activation_linear, {1.0f, 0.5f}));
248
249     build_options bo;
250     bo.set_option(build_option::optimize_data(true));
251
252     network network_first(engine, topology, bo);
253     network_first.set_input_data("input", input);
254     auto outputs = network_first.execute();
255
256     auto output_memory_first = outputs.at("relu6").get_memory();
257     auto output_layout_first = output_memory_first.get_layout();
258     auto output_ptr_first = output_memory_first.pointer<float>();
259
260     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 2816);
261
262     network network_second(engine, topology, bo);
263     network_second.set_input_data("input", input);
264     auto outputs_second = network_second.execute();
265
266     auto output_memory_second = outputs_second.at("relu6").get_memory();
267     auto output_layout_second = output_memory_second.get_layout();
268     auto output_ptr_second = output_memory_second.pointer<float>();
269
270     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 3584);
271     EXPECT_EQ(output_layout_first, output_layout_second);
272
273     int y_size = output_layout_first.size.spatial[1];
274     int x_size = output_layout_first.size.spatial[0];
275     int f_size = output_layout_first.size.feature[0];
276     int b_size = output_layout_first.size.batch[0];
277     int f_offset = y_size*x_size;
278     int b_offset = f_size * f_offset;
279     for (int b = 0; b < b_size; ++b)
280     {
281         for (int f = 0; f < f_size; ++f)
282         {
283             for (int y = 0; y < y_size; ++y) 
284             {
285                 for (int x = 0; x < x_size; ++x) 
286                 {
287                     int idx = b * b_offset + f * f_offset + y * x_size + x;
288                     EXPECT_EQ(output_ptr_first[idx], output_ptr_second[idx]);
289                 }
290             }
291         }
292     } 
293 }
294
295 TEST(memory_pool, shared_mem_pool_same_topology_twice_weights) {
296
297     engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
298     engine engine{ cfg };
299     auto batch_num = 1;
300     auto feature_num = 3;
301     auto inp_x_size = 4;
302     auto inp_y_size = 4;
303
304     auto input= memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } });
305     auto weights = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 3, 2 } });
306     
307     std::vector<float> dummy_input_data_1 = {
308        /*f0 xy*/ 0.8f, 0.65f, 0.1f, 1.0f, 1.0f, 0.5f, 0.11f, 0.33f, 0.66f, 0.11f, 0.22f, 0.33f, 0.99f, 0.8f, 0.7f, 0.5f,
309        /*f1 xy*/ 0.48f, 0.05f, 0.35f, 1.0f, 1.0f, 0.51f, 0.51f, 0.13f, 0.86f, 0.10f, 0.29f, 0.53f, 0.99f, 0.4f, 0.3f, 0.1f,
310        /*f2 xy*/ 0.98f, 0.35f, 0.3f, 0.01f, 0.9f, 0.55f, 0.15f, 0.39f, 0.36f, 0.01f, 0.32f, 0.4f, 0.3f, 0.2f, 0.1f, 0.5f,
311     };
312
313     set_values(input, dummy_input_data_1);
314     set_values(weights, { 0.10f, 0.2f, 0.1f, 0.2f, 0.1f, 0.2f });
315
316     topology topology(
317         input_layout("input", input.get_layout()),
318         data("weights", weights),
319         convolution("conv", "input", { "weights" }, { 1, 1, 1, 2 }),
320         softmax("softmax", "conv"));
321
322     build_options bo;
323     bo.set_option(build_option::optimize_data(true));
324
325     network network_first(engine, topology, bo);
326     network_first.set_input_data("input", input);
327     auto outputs = network_first.execute();
328
329     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)824);
330
331     auto output_memory_first = outputs.at("softmax").get_memory();
332     auto output_layout_first = output_memory_first.get_layout();
333     auto output_ptr_first = output_memory_first.pointer<float>();
334
335     network network_second(engine, topology, bo);
336     network_second.set_input_data("input", input);
337     auto outputs_second = network_second.execute();
338
339     auto output_memory_second = outputs_second.at("softmax").get_memory();
340     auto output_layout_second = output_memory_second.get_layout();
341     auto output_ptr_second = output_memory_second.pointer<float>();
342
343     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)1224);
344     EXPECT_EQ(output_layout_first, output_layout_second);
345
346     int y_size = output_layout_first.size.spatial[1];
347     int x_size = output_layout_first.size.spatial[0];
348     int f_size = output_layout_first.size.feature[0];
349     int b_size = output_layout_first.size.batch[0];
350     int f_offset = y_size * x_size;
351     int b_offset = f_size * f_offset;
352     for (int b = 0; b < b_size; ++b)
353     {
354         for (int f = 0; f < f_size; ++f)
355         {
356             for (int y = 0; y < y_size; ++y)
357             {
358                 for (int x = 0; x < x_size; ++x)
359                 {
360                     int idx = b * b_offset + f * f_offset + y * x_size + x;
361                     EXPECT_EQ(output_ptr_first[idx], output_ptr_second[idx]);
362                 }
363             }
364         }
365     }
366 }
367
368
369 TEST(memory_pool, shared_mem_pool_diff_batches) {
370
371     engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
372     engine engine{ cfg };
373     auto batch_8 = 8;
374     auto batch_1 = 1;
375     auto feature_num = 3;
376     auto inp_x_size = 4;
377     auto inp_y_size = 4;
378     auto dt = data_types::f32;
379     auto fmt = format::bfyx;
380     layout lay_batch_1 = { dt, fmt, { tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_1)) }};
381     layout lay_batch_8 = { dt, fmt, { tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_8)) }};
382     auto input_1 = memory::allocate(engine, lay_batch_1);
383     auto input_8 = memory::allocate(engine, lay_batch_8);
384     auto weights = memory::allocate(engine, { dt, fmt, { 1, 1, 3, 2 } });
385
386     std::vector<float> dummy_input_data_1 = generate_random_1d<float>(batch_1*feature_num*inp_x_size*inp_y_size, 0, 1);
387     std::vector<float> dummy_input_data_8 = generate_random_1d<float>(batch_8*feature_num*inp_x_size*inp_y_size, 0, 1);
388
389     set_values(input_1, dummy_input_data_1);
390     set_values(input_8, dummy_input_data_8);
391     set_values(weights, { 0.10f, 0.2f, 0.1f, 0.2f, 0.1f, 0.2f });
392
393     topology topo(
394         input_layout("input", input_8.get_layout()),
395         data("weights", weights),
396         convolution("conv", "input", { "weights" }, { 1, 1, 1, 2 }),
397         softmax("softmax", "conv"));
398
399     build_options bo;
400     bo.set_option(build_option::optimize_data(true));
401
402     network network_first(engine, topo, bo);
403     network_first.set_input_data("input", input_8);
404     auto outputs = network_first.execute();
405
406     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)3928);
407
408     topo.change_input_layout("input", input_1.get_layout());//change input layout to batch=1
409
410     network network_second(engine, topo, bo);
411     network_second.set_input_data("input", input_1);
412     auto outputs_second = network_second.execute();
413
414     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)3928);
415 }
416
417 TEST(memory_pool, shared_dep_two_output) {
418
419     engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
420     engine engine{ cfg };
421     auto batch_1 = 1;
422     auto feature_num = 1;
423     auto inp_x_size = 4;
424     auto inp_y_size = 4;
425     auto dt = data_types::f32;
426     auto fmt = format::bfyx;
427     layout lay_batch_1 = { dt, fmt,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_1)) } };
428     auto input_1 = memory::allocate(engine, lay_batch_1);
429     set_random_values<float>(input_1);
430
431     //build primitives
432     auto constant_0_0 = cldnn::data(
433         "constant_0_0",
434         input_1
435     );
436     auto result_1_0 = cldnn::concatenation(
437         "result_1_0",
438         { constant_0_0 },
439         cldnn::concatenation::along_b
440     );
441     auto result_2_0 = cldnn::concatenation(
442         "result_2_0",
443         { constant_0_0 },
444         cldnn::concatenation::along_b
445     );
446
447     //build and execute network
448     topology topo;
449     topo.add(constant_0_0);
450     topo.add(result_1_0);
451     topo.add(result_2_0);
452
453     build_options bo;
454     bo.set_option(build_option::optimize_data(true));
455
456     network network(engine, topo, bo);
457     auto outputs = network.execute();
458     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)256);
459 }
460
461 TEST(memory_pool, non_opt_intermidate_opt_after) {
462
463     engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
464     engine engine{ cfg };
465     auto input_layout1 = layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 1, 2, 2 });
466     auto input_layout2 = layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 1, 2, 2 });
467
468     auto input_memory1 = cldnn::memory::allocate(engine, input_layout1);
469     auto input_memory2 = cldnn::memory::allocate(engine, input_layout2);
470     auto scale_memory = cldnn::memory::allocate(engine, layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,1,1 }));
471     auto data_memory = cldnn::data("scale_mem", scale_memory);
472
473     set_values(input_memory1, { 1.0f, 2.0f, 3.0f, 4.0f });
474     set_values(input_memory2, { 5.0f, 6.0f, 7.0f, 8.0f });
475     set_values(scale_memory, { 1.0f});
476
477     auto reshape_tensor = cldnn::tensor(8, 1, 1, 1);
478     auto input = cldnn::input_layout("input1", input_layout1);
479     auto input2 = cldnn::input_layout("input2", input_layout2);
480     auto concat = cldnn::concatenation("concat", { "input1", "input2" }, cldnn::concatenation::along_b);
481     auto reshape = cldnn::reshape("reshape", "concat", reshape_tensor);
482     auto crop1 = cldnn::crop("crop1", "reshape", { 1,1,1,1 }, { 0, 0, 0, 0 });
483     auto crop2 = cldnn::crop("crop2", "reshape", { 1,1,1,1 }, { 1, 0, 0, 0 });
484     auto eltwise1 = cldnn::scale("elt1", "crop1", "scale_mem");
485     auto eltwise2 = cldnn::scale("elt2", "crop2", "scale_mem");
486
487     auto topology = cldnn::topology(
488         input, input2,
489         concat,
490         reshape,
491         crop1, crop2,
492         eltwise1, eltwise2,
493         data_memory
494     );
495
496     build_options bo;
497     bo.set_option(build_option::optimize_data(false));
498     network network(engine, topology, bo);
499     network.set_input_data("input1", input_memory1);
500     network.set_input_data("input2", input_memory2);
501     auto outputs = network.execute();
502     EXPECT_EQ(outputs.size(), static_cast<size_t>(2));
503
504     auto out1 = outputs.at("elt1");
505     auto out2 = outputs.at("elt2");
506
507     auto out1_ptr = out1.get_memory().pointer<float>();
508     auto out2_ptr = out2.get_memory().pointer<float>();
509     EXPECT_EQ(out1_ptr[0], 1.0f);
510     EXPECT_EQ(out2_ptr[0], 2.0f);
511 }
512
513 TEST(memory_pool, add_mem_dep_test) {
514
515     engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
516     engine engine{ cfg };
517     auto input_layout1 = layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 2, 2, 2 });
518
519     auto input_memory1 = cldnn::memory::allocate(engine, input_layout1);
520     auto scale_memory = cldnn::memory::allocate(engine, layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,1,1 }));
521     auto data_memory = cldnn::data("scale_mem", scale_memory);
522
523     set_values(input_memory1, { 1.0f, 2.0f, 3.0f, 4.0f,
524         5.0f, 6.0f, 7.0f, 8.0f});
525     set_values(scale_memory, { 1.0f });
526
527
528     auto input = cldnn::input_layout("input1", input_layout1);
529     auto actv1 = cldnn::activation("input_activ1", "input1", cldnn_activation_func::activation_abs);
530     auto actv2 = cldnn::activation("input_activ2", "input1", cldnn_activation_func::activation_abs);
531     auto crop1 = cldnn::crop("crop1", "input_activ1", { 1,1,2,2 }, { 0, 0, 0, 0 });
532     auto crop2 = cldnn::crop("crop2", "input_activ2", { 1,1,2,2 }, { 0, 1, 0, 0 });
533     auto eltwise1 = cldnn::scale("elt1", "crop1", "scale_mem");
534     auto eltwise2 = cldnn::scale("elt2", "crop2", "scale_mem");
535     auto actv3 = cldnn::activation("out3", "elt1", cldnn_activation_func::activation_abs);
536     auto actv4 = cldnn::activation("out4", "elt2", cldnn_activation_func::activation_abs);
537
538     auto topology = cldnn::topology(
539         input,
540         crop1, crop2,
541         actv1, actv2,
542         eltwise1, eltwise2,
543         data_memory,
544         actv3, actv4
545     );
546
547     build_options bo;
548     bo.set_option(build_option::optimize_data(true));
549     network network(engine, topology, bo);
550     network.set_input_data("input1", input_memory1);
551     auto outputs = network.execute();
552     EXPECT_EQ(outputs.size(), static_cast<size_t>(2));
553
554     auto out1 = outputs.at("out3");
555     auto out2 = outputs.at("out4");
556
557     auto out1_ptr = out1.get_memory().pointer<float>();
558     auto out2_ptr = out2.get_memory().pointer<float>();
559     EXPECT_EQ(out1_ptr[0], 1.0f);
560     EXPECT_EQ(out1_ptr[1], 2.0f);
561     EXPECT_EQ(out1_ptr[2], 3.0f);
562     EXPECT_EQ(out1_ptr[3], 4.0f);
563
564     EXPECT_EQ(out2_ptr[0], 5.0f);
565     EXPECT_EQ(out2_ptr[1], 6.0f);
566     EXPECT_EQ(out2_ptr[2], 7.0f);
567     EXPECT_EQ(out2_ptr[3], 8.0f);
568 }