2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
19 #include <gtest/gtest.h>
20 #include <api/CPP/engine.hpp>
21 #include <api/CPP/memory.hpp>
22 #include <api/CPP/topology.hpp>
23 #include <api/CPP/network.hpp>
24 #include <api/CPP/input_layout.hpp>
25 #include <api/CPP/activation.hpp>
26 #include <api/CPP/pooling.hpp>
27 #include <api/CPP/concatenation.hpp>
28 #include <api/CPP/data.hpp>
29 #include <api/CPP/reshape.hpp>
30 #include <api/CPP/crop.hpp>
31 #include <api/CPP/scale.hpp>
33 #include "test_utils/test_utils.h"
35 using namespace cldnn;
36 using namespace tests;
39 TEST(memory_tests, DISABLED_execution_loop)
43 memory in = memory::allocate(eng, layout{ data_types::f32, format::bfyx, { 1, 1, 1000, 1000 } });
46 input_layout("in", in.get_layout()),
47 activation("out", "in", activation_linear)
50 network net(eng, tpl);
54 net.set_input_data("in", in);
59 TEST(memory_tests, DISABLED_network_creation_loop)
63 memory in = memory::allocate(eng, layout{ data_types::f32, format::bfyx,{ 1, 1, 1000, 1000 } });
66 input_layout("in", in.get_layout()),
67 activation("out", "in", activation_linear)
72 network net(eng, tpl);
76 TEST(memory_pool, basic_non_padded_relu_pipe) {
77 // 5 relu's of size 1x4x1x1
78 const cldnn::engine engine;// here we need new engine
84 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
87 topology.add(input_layout("input", input.get_layout()));
88 topology.add(activation("relu", "input", activation_relu));
89 topology.add(activation("relu1", "relu", activation_relu));
90 topology.add(activation("relu2", "relu1", activation_relu));
91 topology.add(activation("relu3", "relu2", activation_relu));
92 topology.add(activation("relu4", "relu3", activation_relu));
93 topology.add(activation("relu5", "relu4", activation_relu));
95 std::vector<float> input_vec = { -1.f, 2.f, -3.f, 4.f };
96 set_values(input, input_vec);
98 bo.set_option(build_option::optimize_data(true));
100 network network(engine, topology, bo);
101 network.set_input_data("input", input);
102 auto outputs = network.execute();
104 EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 80);
108 TEST(memory_pool, basic_non_padded_relu_and_pooling_pipe) {
109 // uncomment this line to disable memory pool
110 /*engine_configuration cfg{ false, false, false, std::string(), std::string(), true, std::string(),std::string(), 0, false };
111 engine engine{ cfg };*/
112 const cldnn::engine engine;// here we need new engine
114 auto feature_num = 4;
118 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
121 topology.add(input_layout("input", input.get_layout()));
122 topology.add(activation("relu", "input", activation_relu));
123 topology.add(activation("relu1", "relu", activation_relu));
124 topology.add(pooling("pool1", "relu1",pooling_mode::max, { 1,1,3,3 }, { 1,1,2,2 }));
125 topology.add(activation("relu2", "pool1", activation_relu));
126 topology.add(activation("relu3", "relu2", activation_relu));
127 topology.add(activation("relu4", "relu3", activation_relu));
128 topology.add(activation("relu5", "relu4", activation_relu));
131 bo.set_option(build_option::optimize_data(true));
133 network network(engine, topology, bo);
134 network.set_input_data("input", input);
135 auto outputs = network.execute();
137 EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)1088);
141 TEST(memory_pool, multi_outputs_network) {
142 // -- relu -- relu1 -- relu4
144 // -- relu2 -- relu3 -- relu5--relu6--relu7
145 // neither of relu5, relu6 nor relu7 can share resource with relu4.
147 // uncomment this line to disable memory pool
148 /*engine_configuration cfg{ false, false, false, std::string(), std::string(), true, std::string(),std::string(), 0, false };
149 engine engine{ cfg };*/
150 const cldnn::engine engine;// here we need new engine
152 auto feature_num = 4;
156 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
159 topology.add(input_layout("input", input.get_layout()));
160 topology.add(activation("relu", "input", activation_relu));
161 topology.add(activation("relu1", "relu", activation_relu));
162 topology.add(activation("relu2", "input", activation_relu));
163 topology.add(activation("relu3", "relu2", activation_relu));
164 topology.add(activation("relu4", "relu1", activation_relu));
165 topology.add(activation("relu5", "relu3", activation_relu));
166 topology.add(activation("relu6", "relu5", activation_relu));
167 topology.add(activation("relu7", "relu6", activation_relu));
170 bo.set_option(build_option::optimize_data(true));
172 network network(engine, topology, bo);
173 network.set_input_data("input", input);
174 auto outputs = network.execute();
176 EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)2048);
180 TEST(memory_pool, oooq) {
181 /* -- relu1 - concat1- relu4 --
182 input< -- relu2 / >-- concat2 -- relu6
183 -- relu3 -- relu5 ---------
184 neither of relu5, relu6 nor relu7 can share resource with relu4. */
186 engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
187 engine engine{ cfg };
189 auto feature_num = 4;
193 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
196 topology.add(input_layout("input", input.get_layout()));
197 topology.add(activation("relu1", "input", activation_relu));
198 topology.add(activation("relu2", "input", activation_relu));
199 topology.add(activation("relu3", "input", activation_relu));
200 topology.add(concatenation("concat1", { "relu1", "relu2"},concatenation::along_f));
201 topology.add(activation("relu4", "concat1", activation_relu));
202 topology.add(activation("relu5", "relu3", activation_relu));
203 topology.add(concatenation("concat2", { "relu4", "relu5" }, concatenation::along_f));
204 topology.add(activation("relu6", "concat2", activation_relu));
207 bo.set_option(build_option::optimize_data(true));
209 network network(engine, topology, bo);
210 network.set_input_data("input", input);
211 auto outputs = network.execute();
213 EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 2816);
216 TEST(memory_pool, shared_mem_pool_same_topology_twice) {
217 /* -- relu1 - concat1- relu4 --
218 input< -- relu2 | >-- concat2 -- relu6
219 -- relu3 -- relu5 ---------
220 neither of relu5, relu6 nor relu7 can share resource with relu4. */
222 engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
223 engine engine{ cfg };
225 auto feature_num = 4;
229 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } });
232 { 1.0f, 2.5f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 6.1f, 4.7f, 1.0f, 1.0f, 8.2f, 1.0f, 2.0f, 1.0f,
233 5.0f, 2.0f, 2.0f, 3.0f, 5.0f, 2.0f, 2.0f, 3.0f, 1.1f, 2.4f, 1.0f, 1.0f, 4.0f, 6.0f, 3.0f, 3.6f,
234 4.0f, 6.0f, 3.0f, 3.0f, 1.0f, 1.0f, 1.5f, 1.0f, 4.0f, 6.5f, 3.0f, 3.0f, 4.0f, 6.0f, 1.8f, 3.5f,
235 3.0f, 5.0f, 1.0f, 1.0f, 1.3f, 1.0f, 0.4f, 1.3f, 4.0f, 7.0f, 3.0f, 3.0f, 1.0f, 2.0f, 3.9f, 4.0f
239 topology.add(input_layout("input", input.get_layout()));
240 topology.add(activation("relu1", "input", activation_relu));
241 topology.add(activation("relu2", "input", activation_sqrt));
242 topology.add(activation("relu3", "input", activation_square));
243 topology.add(concatenation("concat1", { "relu1", "relu2" }, concatenation::along_f));
244 topology.add(activation("relu4", "concat1", activation_relu));
245 topology.add(activation("relu5", "relu3", activation_relu));
246 topology.add(concatenation("concat2", { "relu4", "relu5" }, concatenation::along_f));
247 topology.add(activation("relu6", "concat2", activation_linear, {1.0f, 0.5f}));
250 bo.set_option(build_option::optimize_data(true));
252 network network_first(engine, topology, bo);
253 network_first.set_input_data("input", input);
254 auto outputs = network_first.execute();
256 auto output_memory_first = outputs.at("relu6").get_memory();
257 auto output_layout_first = output_memory_first.get_layout();
258 auto output_ptr_first = output_memory_first.pointer<float>();
260 EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 2816);
262 network network_second(engine, topology, bo);
263 network_second.set_input_data("input", input);
264 auto outputs_second = network_second.execute();
266 auto output_memory_second = outputs_second.at("relu6").get_memory();
267 auto output_layout_second = output_memory_second.get_layout();
268 auto output_ptr_second = output_memory_second.pointer<float>();
270 EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 3584);
271 EXPECT_EQ(output_layout_first, output_layout_second);
273 int y_size = output_layout_first.size.spatial[1];
274 int x_size = output_layout_first.size.spatial[0];
275 int f_size = output_layout_first.size.feature[0];
276 int b_size = output_layout_first.size.batch[0];
277 int f_offset = y_size*x_size;
278 int b_offset = f_size * f_offset;
279 for (int b = 0; b < b_size; ++b)
281 for (int f = 0; f < f_size; ++f)
283 for (int y = 0; y < y_size; ++y)
285 for (int x = 0; x < x_size; ++x)
287 int idx = b * b_offset + f * f_offset + y * x_size + x;
288 EXPECT_EQ(output_ptr_first[idx], output_ptr_second[idx]);
295 TEST(memory_pool, shared_mem_pool_same_topology_twice_weights) {
297 engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
298 engine engine{ cfg };
300 auto feature_num = 3;
304 auto input= memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } });
305 auto weights = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 3, 2 } });
307 std::vector<float> dummy_input_data_1 = {
308 /*f0 xy*/ 0.8f, 0.65f, 0.1f, 1.0f, 1.0f, 0.5f, 0.11f, 0.33f, 0.66f, 0.11f, 0.22f, 0.33f, 0.99f, 0.8f, 0.7f, 0.5f,
309 /*f1 xy*/ 0.48f, 0.05f, 0.35f, 1.0f, 1.0f, 0.51f, 0.51f, 0.13f, 0.86f, 0.10f, 0.29f, 0.53f, 0.99f, 0.4f, 0.3f, 0.1f,
310 /*f2 xy*/ 0.98f, 0.35f, 0.3f, 0.01f, 0.9f, 0.55f, 0.15f, 0.39f, 0.36f, 0.01f, 0.32f, 0.4f, 0.3f, 0.2f, 0.1f, 0.5f,
313 set_values(input, dummy_input_data_1);
314 set_values(weights, { 0.10f, 0.2f, 0.1f, 0.2f, 0.1f, 0.2f });
317 input_layout("input", input.get_layout()),
318 data("weights", weights),
319 convolution("conv", "input", { "weights" }, { 1, 1, 1, 2 }),
320 softmax("softmax", "conv"));
323 bo.set_option(build_option::optimize_data(true));
325 network network_first(engine, topology, bo);
326 network_first.set_input_data("input", input);
327 auto outputs = network_first.execute();
329 EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)824);
331 auto output_memory_first = outputs.at("softmax").get_memory();
332 auto output_layout_first = output_memory_first.get_layout();
333 auto output_ptr_first = output_memory_first.pointer<float>();
335 network network_second(engine, topology, bo);
336 network_second.set_input_data("input", input);
337 auto outputs_second = network_second.execute();
339 auto output_memory_second = outputs_second.at("softmax").get_memory();
340 auto output_layout_second = output_memory_second.get_layout();
341 auto output_ptr_second = output_memory_second.pointer<float>();
343 EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)1224);
344 EXPECT_EQ(output_layout_first, output_layout_second);
346 int y_size = output_layout_first.size.spatial[1];
347 int x_size = output_layout_first.size.spatial[0];
348 int f_size = output_layout_first.size.feature[0];
349 int b_size = output_layout_first.size.batch[0];
350 int f_offset = y_size * x_size;
351 int b_offset = f_size * f_offset;
352 for (int b = 0; b < b_size; ++b)
354 for (int f = 0; f < f_size; ++f)
356 for (int y = 0; y < y_size; ++y)
358 for (int x = 0; x < x_size; ++x)
360 int idx = b * b_offset + f * f_offset + y * x_size + x;
361 EXPECT_EQ(output_ptr_first[idx], output_ptr_second[idx]);
369 TEST(memory_pool, shared_mem_pool_diff_batches) {
371 engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
372 engine engine{ cfg };
375 auto feature_num = 3;
378 auto dt = data_types::f32;
379 auto fmt = format::bfyx;
380 layout lay_batch_1 = { dt, fmt, { tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_1)) }};
381 layout lay_batch_8 = { dt, fmt, { tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_8)) }};
382 auto input_1 = memory::allocate(engine, lay_batch_1);
383 auto input_8 = memory::allocate(engine, lay_batch_8);
384 auto weights = memory::allocate(engine, { dt, fmt, { 1, 1, 3, 2 } });
386 std::vector<float> dummy_input_data_1 = generate_random_1d<float>(batch_1*feature_num*inp_x_size*inp_y_size, 0, 1);
387 std::vector<float> dummy_input_data_8 = generate_random_1d<float>(batch_8*feature_num*inp_x_size*inp_y_size, 0, 1);
389 set_values(input_1, dummy_input_data_1);
390 set_values(input_8, dummy_input_data_8);
391 set_values(weights, { 0.10f, 0.2f, 0.1f, 0.2f, 0.1f, 0.2f });
394 input_layout("input", input_8.get_layout()),
395 data("weights", weights),
396 convolution("conv", "input", { "weights" }, { 1, 1, 1, 2 }),
397 softmax("softmax", "conv"));
400 bo.set_option(build_option::optimize_data(true));
402 network network_first(engine, topo, bo);
403 network_first.set_input_data("input", input_8);
404 auto outputs = network_first.execute();
406 EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)3928);
408 topo.change_input_layout("input", input_1.get_layout());//change input layout to batch=1
410 network network_second(engine, topo, bo);
411 network_second.set_input_data("input", input_1);
412 auto outputs_second = network_second.execute();
414 EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)3928);
417 TEST(memory_pool, shared_dep_two_output) {
419 engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
420 engine engine{ cfg };
422 auto feature_num = 1;
425 auto dt = data_types::f32;
426 auto fmt = format::bfyx;
427 layout lay_batch_1 = { dt, fmt,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_1)) } };
428 auto input_1 = memory::allocate(engine, lay_batch_1);
429 set_random_values<float>(input_1);
432 auto constant_0_0 = cldnn::data(
436 auto result_1_0 = cldnn::concatenation(
439 cldnn::concatenation::along_b
441 auto result_2_0 = cldnn::concatenation(
444 cldnn::concatenation::along_b
447 //build and execute network
449 topo.add(constant_0_0);
450 topo.add(result_1_0);
451 topo.add(result_2_0);
454 bo.set_option(build_option::optimize_data(true));
456 network network(engine, topo, bo);
457 auto outputs = network.execute();
458 EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)256);
461 TEST(memory_pool, non_opt_intermidate_opt_after) {
463 engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
464 engine engine{ cfg };
465 auto input_layout1 = layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 1, 2, 2 });
466 auto input_layout2 = layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 1, 2, 2 });
468 auto input_memory1 = cldnn::memory::allocate(engine, input_layout1);
469 auto input_memory2 = cldnn::memory::allocate(engine, input_layout2);
470 auto scale_memory = cldnn::memory::allocate(engine, layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,1,1 }));
471 auto data_memory = cldnn::data("scale_mem", scale_memory);
473 set_values(input_memory1, { 1.0f, 2.0f, 3.0f, 4.0f });
474 set_values(input_memory2, { 5.0f, 6.0f, 7.0f, 8.0f });
475 set_values(scale_memory, { 1.0f});
477 auto reshape_tensor = cldnn::tensor(8, 1, 1, 1);
478 auto input = cldnn::input_layout("input1", input_layout1);
479 auto input2 = cldnn::input_layout("input2", input_layout2);
480 auto concat = cldnn::concatenation("concat", { "input1", "input2" }, cldnn::concatenation::along_b);
481 auto reshape = cldnn::reshape("reshape", "concat", reshape_tensor);
482 auto crop1 = cldnn::crop("crop1", "reshape", { 1,1,1,1 }, { 0, 0, 0, 0 });
483 auto crop2 = cldnn::crop("crop2", "reshape", { 1,1,1,1 }, { 1, 0, 0, 0 });
484 auto eltwise1 = cldnn::scale("elt1", "crop1", "scale_mem");
485 auto eltwise2 = cldnn::scale("elt2", "crop2", "scale_mem");
487 auto topology = cldnn::topology(
497 bo.set_option(build_option::optimize_data(false));
498 network network(engine, topology, bo);
499 network.set_input_data("input1", input_memory1);
500 network.set_input_data("input2", input_memory2);
501 auto outputs = network.execute();
502 EXPECT_EQ(outputs.size(), static_cast<size_t>(2));
504 auto out1 = outputs.at("elt1");
505 auto out2 = outputs.at("elt2");
507 auto out1_ptr = out1.get_memory().pointer<float>();
508 auto out2_ptr = out2.get_memory().pointer<float>();
509 EXPECT_EQ(out1_ptr[0], 1.0f);
510 EXPECT_EQ(out2_ptr[0], 2.0f);
513 TEST(memory_pool, add_mem_dep_test) {
515 engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
516 engine engine{ cfg };
517 auto input_layout1 = layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 2, 2, 2 });
519 auto input_memory1 = cldnn::memory::allocate(engine, input_layout1);
520 auto scale_memory = cldnn::memory::allocate(engine, layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,1,1 }));
521 auto data_memory = cldnn::data("scale_mem", scale_memory);
523 set_values(input_memory1, { 1.0f, 2.0f, 3.0f, 4.0f,
524 5.0f, 6.0f, 7.0f, 8.0f});
525 set_values(scale_memory, { 1.0f });
528 auto input = cldnn::input_layout("input1", input_layout1);
529 auto actv1 = cldnn::activation("input_activ1", "input1", cldnn_activation_func::activation_abs);
530 auto actv2 = cldnn::activation("input_activ2", "input1", cldnn_activation_func::activation_abs);
531 auto crop1 = cldnn::crop("crop1", "input_activ1", { 1,1,2,2 }, { 0, 0, 0, 0 });
532 auto crop2 = cldnn::crop("crop2", "input_activ2", { 1,1,2,2 }, { 0, 1, 0, 0 });
533 auto eltwise1 = cldnn::scale("elt1", "crop1", "scale_mem");
534 auto eltwise2 = cldnn::scale("elt2", "crop2", "scale_mem");
535 auto actv3 = cldnn::activation("out3", "elt1", cldnn_activation_func::activation_abs);
536 auto actv4 = cldnn::activation("out4", "elt2", cldnn_activation_func::activation_abs);
538 auto topology = cldnn::topology(
548 bo.set_option(build_option::optimize_data(true));
549 network network(engine, topology, bo);
550 network.set_input_data("input1", input_memory1);
551 auto outputs = network.execute();
552 EXPECT_EQ(outputs.size(), static_cast<size_t>(2));
554 auto out1 = outputs.at("out3");
555 auto out2 = outputs.at("out4");
557 auto out1_ptr = out1.get_memory().pointer<float>();
558 auto out2_ptr = out2.get_memory().pointer<float>();
559 EXPECT_EQ(out1_ptr[0], 1.0f);
560 EXPECT_EQ(out1_ptr[1], 2.0f);
561 EXPECT_EQ(out1_ptr[2], 3.0f);
562 EXPECT_EQ(out1_ptr[3], 4.0f);
564 EXPECT_EQ(out2_ptr[0], 5.0f);
565 EXPECT_EQ(out2_ptr[1], 6.0f);
566 EXPECT_EQ(out2_ptr[2], 7.0f);
567 EXPECT_EQ(out2_ptr[3], 8.0f);