2 // Copyright (c) 2016-2019 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include <gtest/gtest.h>
19 #include "api/CPP/memory.hpp"
20 #include <api/CPP/input_layout.hpp>
21 #include "api/CPP/pooling.hpp"
22 #include "api/CPP/mutable_data.hpp"
23 #include <api/CPP/topology.hpp>
24 #include <api/CPP/network.hpp>
25 #include <api/CPP/engine.hpp>
26 #include "test_utils/test_utils.h"
27 #include "api/CPP/reorder.hpp"
28 #include <api/CPP/data.hpp>
29 #include "test_utils/float16.h"
31 using namespace cldnn;
32 using namespace tests;
35 TEST(pooling_forward_gpu, basic_max_byxf_f32_wsiz3x3_wstr1x1_i1x3x3x8_nopad) {
36 // Brief test description.
44 // [ 0.5, -0.5, -0.5, -0.5, 0.5f, -0.5, -0.5f, -0.5 ]
45 // [ 1.0, 0.0, 0.0, 0.0, 0.5, -0.5, -0.5, -0.5 ]
46 // [ 2.0, 0.0, 0.0, 0.0, 0.5, -0.5, -0.5, -0.5 ]
47 // [ 3.0, 0.0, 0.0, 0.0, 0.5, -0.5, -0.5, -0.5 ]
48 // [ 4.0, 0.0, 0.0, 0.0, 0.5, -0.5, -0.5, -0.5 ]
49 // [ 5.0, 0.0, 0.0, 0.0, 0.5, -0.5, -0.5, -0.5 ]
50 // [ 6.0, 0.0, 0.0, 0.0, 0.5, -0.5, -0.5, -0.5 ]
51 // [ 7.0, 0.0, 0.0, 0.0, 0.5, -0.5, -0.5, -0.5 ]
52 // [ 8.0, 0.0, 0.0, 4.0, 0.5, -0.5, -0.5, -0.5 ]
55 // [ 8.0, 0.0, 0.0, 4,0, 0,5, -0.5, -0.5, -0.5 ]
57 const auto& engine = get_test_engine();
59 auto input_prim = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 8, 3, 3 } });
62 topology.add(input_layout("input_prim", input_prim.get_layout()));
63 topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, { 1,1,3,3 }, { 1,1,1,1 }));
64 network network(engine, topology);
65 set_values(input_prim, { 0.5f, -0.5f, -0.5f, -0.5f, 0.5f, -0.5f, -0.5f, -0.5f,
66 1.0f, 0.0f, 0.0f, 0.0f, 0.5f, -0.5f, -0.5f, -0.5f,
67 2.0f, 0.0f, 0.0f, 0.0f, 0.5f, -0.5f, -0.5f, -0.5f,
68 3.0f, 0.0f, 0.0f, 0.0f, 0.5f, -0.5f, -0.5f, -0.5f,
69 4.0f, 0.0f, 0.0f, 0.0f, 0.5f, -0.5f, -0.5f, -0.5f,
70 5.0f, 0.0f, 0.0f, 0.0f, 0.5f, -0.5f, -0.5f, -0.5f,
71 6.0f, 0.0f, 0.0f, 0.0f, 0.5f, -0.5f, -0.5f, -0.5f,
72 7.0f, 0.0f, 0.0f, 0.0f, 0.5f, -0.5f, -0.5f, -0.5f,
73 8.0f, 0.0f, 0.0f, 4.0f, 0.5f, -0.5f, -0.5f, -0.5f });
74 network.set_input_data("input_prim", input_prim);
76 auto outputs = network.execute();
77 EXPECT_EQ(outputs.size(), size_t(1));
78 EXPECT_EQ(outputs.begin()->first, "pool_prim");
80 auto output_prim = outputs.begin()->second.get_memory();
82 auto output_ptr = output_prim.pointer<float>();
83 EXPECT_EQ(4.0f, output_ptr[3]);
86 TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz3x3_wstr1x1_i3x3x1x1_nopad) {
87 // Brief test description.
102 const auto& engine = get_test_engine();
104 auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
107 topology.add(input_layout("input_prim", input_prim.get_layout()));
108 topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, { 1,1,3,3 }, { 1,1,1,1 }));
110 network network(engine, topology);
111 set_values(input_prim, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f });
112 network.set_input_data("input_prim", input_prim);
114 auto outputs = network.execute();
115 EXPECT_EQ(outputs.size(), size_t(1));
116 EXPECT_EQ(outputs.begin()->first, "pool_prim");
118 auto output_prim = outputs.begin()->second.get_memory();
120 auto output_ptr = output_prim.pointer<float>();
122 EXPECT_EQ(2.0f, output_ptr[0]);
125 TEST(pooling_forward_gpu, basic_max_yxfb_f32_global_i3x3x1x1_nopad) {
126 // Brief test description.
129 // Global pooling: true
140 const auto& engine = get_test_engine();
142 auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 3, 3 } });
145 topology.add(input_layout("input_prim", input_prim.get_layout()));
146 topology.add(pooling("pool_prim", "input_prim", pooling_mode::max));
148 network network(engine, topology);
149 set_values(input_prim, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f });
150 network.set_input_data("input_prim", input_prim);
152 auto outputs = network.execute();
153 EXPECT_EQ(outputs.size(), size_t(1));
154 EXPECT_EQ(outputs.begin()->first, "pool_prim");
156 auto output_prim = outputs.begin()->second.get_memory();
158 auto output_ptr = output_prim.pointer<float>();
160 EXPECT_EQ(2.0f, output_ptr[0]);
163 TEST(pooling_forward_gpu, basic_max_pooling_int8) {
165 const auto& engine = get_test_engine();
166 layout in_layout = { type_to_data_type<float>::value,format::byxf,{ 1,1,3,3 } };
167 layout out_layout = { type_to_data_type<float>::value,format::byxf,{ 1,1,1,1 } };
168 layout byte_layout = { type_to_data_type<int8_t>::value, format::bfyx,{ 1,1,3,3 } };
169 std::initializer_list<float> input_f = { 1.0f, -2.5f, 3.1f, -4.0f, 5.03f, -6.99f, 7.0f, -8.0f, 9.5f };
170 std::list<float> final_results = { 9.0f };
172 // Allocate memory for input image.
173 auto input_memory = memory::allocate(engine, in_layout);
174 set_values(input_memory, input_f);
176 // Create input_layout description
177 // "input" - is the primitive id inside topology
178 input_layout input("input", in_layout);
181 // 1. input layout primitive.
183 // 2. reorder primitive with id "reorder_input"
184 reorder("reorder_input", input, byte_layout),
185 pooling("pool1", "reorder_input", pooling_mode::max, { 1,1,3,3 }, {1,1,1,1}),
186 reorder("reorder2", "pool1", out_layout)
193 build_option::outputs({ "reorder2" })
196 network.set_input_data("input", input_memory);
198 auto outputs = network.execute();
200 auto interm = outputs.at("reorder2").get_memory();
201 auto interm_ptr = interm.pointer<float>();
202 auto output_size = outputs.at("reorder2").get_memory().count();
203 unsigned int cntr = 0;
204 for (const auto& exp : final_results)
206 EXPECT_EQ(exp, interm_ptr[cntr++]);
210 TEST(pooling_forward_gpu, basic_avg_pooling_int8) {
212 const auto& engine = get_test_engine();
213 layout in_layout = { type_to_data_type<float>::value,format::byxf,{ 1,1,3,3 } };
214 layout out_layout = { type_to_data_type<float>::value,format::byxf,{ 1,1,1,1 } };
215 layout byte_layout = { type_to_data_type<int8_t>::value, format::bfyx,{ 1,1,3,3 } };
216 std::initializer_list<float> input_f = { 2.0f, -2.5f, 5.1f, -4.0f, 8.03f, -6.99f, 17.0f, -8.0f, 19.5f };
217 auto final_result = 0.0f;
218 for (const auto& val : input_f)
220 final_result += (float)((char)val);
222 final_result /= input_f.size();
223 final_result = (float)((char)final_result);
224 // Allocate memory for input image.
225 auto input_memory = memory::allocate(engine, in_layout);
226 set_values(input_memory, input_f);
228 // Create input_layout description
229 // "input" - is the primitive id inside topology
230 input_layout input("input", in_layout);
233 // 1. input layout primitive.
235 // 2. reorder primitive with id "reorder_input"
236 reorder("reorder_input", input, byte_layout),
237 pooling("pool1", "reorder_input", pooling_mode::average, { 1,1,3,3 }, { 1,1,1,1 }),
238 reorder("reorder2", "pool1", out_layout)
245 build_option::outputs({ "reorder2" })
248 network.set_input_data("input", input_memory);
250 auto outputs = network.execute();
252 auto interm = outputs.at("reorder2").get_memory();
253 auto interm_ptr = interm.pointer<float>();
254 auto output_size = outputs.at("reorder2").get_memory().count();
255 unsigned int cntr = 0;
256 EXPECT_EQ(final_result, interm_ptr[0]);
259 TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr1x1_i3x3x1x1_nopad) {
260 // Brief test description.
276 const auto& engine = get_test_engine();
278 auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
281 topology.add(input_layout("input_prim", input_prim.get_layout()));
282 topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, { 1,1,2,2 }, { 1,1,1,1 }));
284 network network(engine, topology);
285 set_values(input_prim, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f });
286 network.set_input_data("input_prim", input_prim);
288 auto outputs = network.execute();
289 EXPECT_EQ(outputs.size(), size_t(1));
290 EXPECT_EQ(outputs.begin()->first, "pool_prim");
292 auto output_prim = outputs.begin()->second.get_memory();
294 auto output_ptr = output_prim.pointer<float>();
296 EXPECT_EQ(2.0f, output_ptr[0]);
297 EXPECT_EQ(1.5f, output_ptr[1]);
298 EXPECT_EQ(2.0f, output_ptr[2]);
299 EXPECT_EQ(1.5f, output_ptr[3]);
302 TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr2x2_i4x4x1x1_nopad) {
303 // Brief test description.
311 // [-0.25, 1.00, 0.50, 0.25]
312 // [ 2.00, 1.50, -0.50, -0.75]
313 // [ 0.00, -1.00, 0.50, 0.25]
314 // [ 0.50, -2.00, -1.50, -2.50]
320 const auto& engine = get_test_engine();
322 auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } });
325 topology.add(input_layout("input_prim", input_prim.get_layout()));
326 topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, { 1,1,2,2 }, { 1,1,2,2 }));
328 network network(engine, topology);
329 set_values(input_prim, { -0.25f, 1.00f, 0.50f, 0.25f, 2.00f, 1.50f, -0.50f, -0.75f, 0.00f, -1.00f, 0.50f, 0.25f, 0.50f, -2.00f, -1.50f, -2.50f });
330 network.set_input_data("input_prim", input_prim);
332 auto outputs = network.execute();
333 EXPECT_EQ(outputs.size(), size_t(1));
334 EXPECT_EQ(outputs.begin()->first, "pool_prim");
336 auto output_prim = outputs.begin()->second.get_memory();
338 auto output_ptr = output_prim.pointer<float>();
340 EXPECT_EQ(2.0f, output_ptr[0]);
341 EXPECT_EQ(0.5f, output_ptr[1]);
342 EXPECT_EQ(0.5f, output_ptr[2]);
343 EXPECT_EQ(0.5f, output_ptr[3]);
346 TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr1x1_i3x3x2x2_nopad) {
347 // Brief test description.
355 // FM: 0 BATCH: 0 FM: 1 BATCH: 0
356 // [-0.5, 0.5, 0.0] [-1.5, -0.5, 0.0]
357 // [ 1.0, -1.0, -2.0] [ 0.0, -1.0, 1.5]
358 // [-1.0, -0.5, -0.5] [-2.0, 1.0, -0.5]
360 // FM: 0 BATCH: 1 FM: 1 BATCH: 1
361 // [ 0.5, 0.0, -0.5] [ 0.0, 0.5, -0.5]
362 // [-2.0, -1.0, 1.0] [ 1.0, -1.0, 0.0]
363 // [-0.5, -1.0, 1.5] [ 0.5, -0.5, 0.0]
366 // FM: 0 BATCH: 0 FM: 1 BATCH: 0
367 // [ 1.0, 0.5] [ 0.0, 1.5]
368 // [ 1.0, -0.5] [ 1.0, 1.5]
370 // FM: 0 BATCH: 1 FM: 1 BATCH: 1
371 // [ 0.5, 1.0] [ 1.0, 0.5]
372 // [-0.5, 1.5] [ 1.0, 0.0]
374 const auto& engine = get_test_engine();
376 auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 3 } });
379 topology.add(input_layout("input_prim", input_prim.get_layout()));
380 topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, { 1,1,2,2 }, { 1,1,1,1 }));
382 network network(engine, topology);
383 set_values(input_prim, { -0.5f, 0.5f, -1.5f, 0.0f, 0.5f, 0.0f, -0.5f, 0.5f, 0.0f, -0.5f, 0.0f, -0.5f, 1.0f, -2.0f, 0.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -2.0f, 1.0f, 1.5f, 0.0f, -1.0f, -0.5f, -2.0f, 0.5f, -0.5f, -1.0f, 1.0f, -0.5f, -0.5f, 1.5f, -0.5f, 0.0f });
384 network.set_input_data("input_prim", input_prim);
386 auto outputs = network.execute();
387 EXPECT_EQ(outputs.size(), size_t(1));
388 EXPECT_EQ(outputs.begin()->first, "pool_prim");
390 auto output_prim = outputs.begin()->second.get_memory();
392 auto output_ptr = output_prim.pointer<float>();
394 EXPECT_EQ(1.0f, output_ptr[0]); EXPECT_EQ(0.0f, output_ptr[2]);
395 EXPECT_EQ(0.5f, output_ptr[4]); EXPECT_EQ(1.5f, output_ptr[6]);
396 EXPECT_EQ(1.0f, output_ptr[8]); EXPECT_EQ(1.0f, output_ptr[10]);
397 EXPECT_EQ(-0.5f, output_ptr[12]); EXPECT_EQ(1.5f, output_ptr[14]);
399 EXPECT_EQ(0.5f, output_ptr[1]); EXPECT_EQ(1.0f, output_ptr[3]);
400 EXPECT_EQ(1.0f, output_ptr[5]); EXPECT_EQ(0.5f, output_ptr[7]);
401 EXPECT_EQ(-0.5f, output_ptr[9]); EXPECT_EQ(1.0f, output_ptr[11]);
402 EXPECT_EQ(1.5f, output_ptr[13]); EXPECT_EQ(0.0f, output_ptr[15]);
405 TEST(pooling_forward_gpu, offsets_max_yxfb_f32_wsiz2x2_wstr2x2_i2x2x1x1_zeropad) {
406 // Brief test description.
413 // Input offset : -1x-1
415 // [ padd, padd, padd, padd]
416 // [ padd, 1.5, -0.5, padd]
417 // [ padd, -1.0, 0.5, padd]
418 // [ padd, padd, padd, padd]
424 const auto& engine = get_test_engine();
426 auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
429 topology.add(input_layout("input_prim", input_prim.get_layout()));
430 topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, { 1,1,2,2 }, { 1,1,2,2 }, { 0, 0, -1,-1 }));
432 network network(engine, topology);
433 set_values(input_prim, { 1.50f, -0.50f, -1.00f, 0.50f });
434 network.set_input_data("input_prim", input_prim);
436 auto outputs = network.execute();
437 EXPECT_EQ(outputs.size(), size_t(1));
438 EXPECT_EQ(outputs.begin()->first, "pool_prim");
440 auto output_prim = outputs.begin()->second.get_memory();
442 auto output_ptr = output_prim.pointer<float>();
443 EXPECT_EQ( 1.5f, output_ptr[0]);
444 EXPECT_EQ(-0.5f, output_ptr[1]);
445 EXPECT_EQ(-1.0f, output_ptr[2]);
446 EXPECT_EQ( 0.5f, output_ptr[3]);
449 TEST(pooling_forward_gpu, offsets_max_yxfb_f32_wsiz2x2_wstr2x2_i3x3x1x1_zeropad) {
450 // Brief test description.
457 // Input offset : -1x-1
459 // [ padd, padd, padd, padd, padd]
460 // [ padd, 1.5, -1.0, -0.5, padd]
461 // [ padd, 1.0, -1.0, -1.0, padd]
462 // [ padd, -1.0, -1.0, -0.5, padd]
463 // [ padd, padd, padd, padd, padd]
469 const auto& engine = get_test_engine();
471 auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
474 topology.add(input_layout("input_prim", input_prim.get_layout()));
475 topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 }));
477 network network(engine, topology);
479 set_values(input_prim, {
480 1.50f, -1.00f, -0.50f,
481 1.00f, -1.00f, -1.00f,
482 -1.00f, -1.00f, -0.50f
485 network.set_input_data("input_prim", input_prim);
487 auto outputs = network.execute();
488 EXPECT_EQ(outputs.size(), size_t(1));
489 EXPECT_EQ(outputs.begin()->first, "pool_prim");
491 auto output_prim = outputs.begin()->second.get_memory();
492 EXPECT_EQ((int)output_prim.get_layout().size.count(), 4);
494 auto output_ptr = output_prim.pointer<float>();
495 EXPECT_EQ(1.5f, get_value<float>(output_ptr, 0));
496 EXPECT_EQ(-0.5f, get_value<float>(output_ptr, 1));
497 EXPECT_EQ(1.0f, get_value<float>(output_ptr, 2));
498 EXPECT_EQ(-0.5f, get_value<float>(output_ptr, 3));
501 TEST(pooling_forward_gpu, basic_avg_yxfb_f32_wsiz2x2_wstr1x1_i3x3x1x1_nopad) {
502 // Brief test description.
518 const auto& engine = get_test_engine();
520 auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
523 topology.add(input_layout("input_prim", input_prim.get_layout()));
524 topology.add(pooling("pool_prim", "input_prim", pooling_mode::average,{ 1,1,2,2 },{ 1,1,1,1 }));
526 network network(engine, topology);
527 set_values(input_prim, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 4.0f, -1.0f, 3.5f });
528 network.set_input_data("input_prim", input_prim);
530 auto outputs = network.execute();
531 EXPECT_EQ(outputs.size(), size_t(1));
532 EXPECT_EQ(outputs.begin()->first, "pool_prim");
534 auto output_prim = outputs.begin()->second.get_memory();
536 auto output_ptr = output_prim.pointer<float>();
538 EXPECT_EQ(1.0f, output_ptr[0]);
539 EXPECT_EQ(0.625f, output_ptr[1]);
540 EXPECT_EQ(1.625f, output_ptr[2]);
541 EXPECT_EQ(0.875f, output_ptr[3]);
544 TEST(pooling_forward_gpu, offsets_avg_yxfb_f32_wsiz2x2_wstr2x2_i2x2x1x1_zeropad) {
545 // Brief test description.
552 // Input offset : -1x-1
554 // [ padd, padd, padd, padd]
555 // [ padd, 1.5, -0.5, padd]
556 // [ padd, -1.0, 0.5, padd]
557 // [ padd, padd, padd, padd]
563 const auto& engine = get_test_engine();
565 auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
568 topology.add(input_layout("input_prim", input_prim.get_layout()));
569 topology.add(pooling("pool_prim", "input_prim", pooling_mode::average, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 }));
571 network network(engine, topology);
572 set_values(input_prim, { 1.5f, -0.5f, -1.0f, 0.5f });
573 network.set_input_data("input_prim", input_prim);
575 auto outputs = network.execute();
576 EXPECT_EQ(outputs.size(), size_t(1));
577 EXPECT_EQ(outputs.begin()->first, "pool_prim");
579 auto output_prim = outputs.begin()->second.get_memory();
581 auto output_ptr = output_prim.pointer<float>();
582 EXPECT_EQ(0.375f, output_ptr[0]);
583 EXPECT_EQ(-0.125f, output_ptr[1]);
584 EXPECT_EQ(-0.25f, output_ptr[2]);
585 EXPECT_EQ(0.125f, output_ptr[3]);
588 TEST(pooling_forward_gpu, offsets_avg_bfyx_f32_wsiz3x3_wstr3x3_i1x1x3x3_zeropad) {
589 // Test the corner case when average pooling window contains data from image, data from padding and data outside padding
596 // Input offset : -1x-1
598 // [ padd, padd, padd, padd, padd]
599 // [ padd, 1.5, -0.5, -1.0, padd]
600 // [ padd, 0.5, 0.1, 0.2, padd]
601 // [ padd, 0.9, 1.1, 2.2, padd]
602 // [ padd, padd, padd, padd, padd]
605 // [ 0.177777, -0.133333]
608 const auto& engine = get_test_engine();
610 auto input_prim = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 3 } });
613 topology.add(input_layout("input_prim", input_prim.get_layout()));
614 topology.add(pooling("pool_prim", "input_prim", pooling_mode::average, { 1,1,3,3 }, { 1,1,3,3 }, { 0,0,-1,-1 }));
616 network network(engine, topology);
618 std::vector<float> input_vec = { 1.5f, -0.5f, -1.0f, 0.5f, 0.1f, 0.2f, 0.9f, 1.1f, 2.2f };
619 set_values(input_prim, input_vec);
621 network.set_input_data("input_prim", input_prim);
623 auto outputs = network.execute();
624 EXPECT_EQ(outputs.size(), size_t(1));
625 EXPECT_EQ(outputs.begin()->first, "pool_prim");
627 auto output_prim = outputs.begin()->second.get_memory();
629 auto output_ptr = output_prim.pointer<float>();
631 EXPECT_NEAR(output_ptr[0], 0.177777f, 1e-05F);
632 EXPECT_NEAR(output_ptr[1], -0.133333f, 1e-05F);
633 EXPECT_NEAR(output_ptr[2], 0.333333f, 1e-05F);
634 EXPECT_NEAR(output_ptr[3], 0.55f, 1e-05F);
637 TEST(pooling_forward_gpu, offsets_avg_yxfb_f32_wsiz2x2_wstr2x2_i3x3x1x1_zeropad) {
638 // Brief test description.
645 // Input offset : -1x-1
647 // [ padd, padd, padd, padd]
648 // [ padd, 1.5, -0.5, 2.5]
649 // [ padd, -1.0, 0.5, 3.0]
650 // [ padd, 0.5, 0.0, -8.0]
656 const auto& engine = get_test_engine();
658 auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
661 topology.add(input_layout("input_prim", input_prim.get_layout()));
662 topology.add(pooling("pool_prim", "input_prim", pooling_mode::average, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 }));
664 network network(engine, topology);
665 set_values(input_prim, { 1.5f, -0.5f, 2.5f, -1.0f, 0.5f, 3.0f, 0.5f, 0.0f, -8.0f });
666 network.set_input_data("input_prim", input_prim);
668 auto outputs = network.execute();
669 EXPECT_EQ(outputs.size(), size_t(1));
670 EXPECT_EQ(outputs.begin()->first, "pool_prim");
672 auto output_prim = outputs.begin()->second.get_memory();
673 EXPECT_EQ((int)output_prim.get_layout().size.count(), 4);
675 auto output_ptr = output_prim.pointer<float>();
676 EXPECT_EQ(0.375f, output_ptr[0]);
677 EXPECT_EQ(0.5f, output_ptr[1]);
678 EXPECT_EQ(-0.125f, output_ptr[2]);
679 EXPECT_EQ(-1.125f, output_ptr[3]);
682 TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_outpad2) {
683 // Brief test description.
690 // Input offset : -1x-1
692 // [ padd, padd, padd, padd]
693 // [ padd, 1.5, -0.5, padd]
694 // [ padd, -1.0, 0.5, padd]
695 // [ padd, padd, padd, padd]
698 // [0, 0, 0, 0, 0, 0]
699 // [0, 0, 0, 0, 0, 0]
700 // [ 0, 0, 0.375, -0.125, 0, 0]
701 // [ 0, 0, -0.25, 0.125, 0, 0]
702 // [0, 0, 0, 0, 0, 0]
703 // [0, 0, 0, 0, 0, 0]
705 const auto& engine = get_test_engine();
706 std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
708 for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
710 std::cout << "Testing format: " << format::order(*it) << std::endl;
712 tensor input_tensor( 1, 1, 2, 2 );
713 auto input_prim = memory::allocate(engine, { data_types::f32, *it, input_tensor });
716 topology.add(input_layout("input_prim", input_prim.get_layout()));
717 topology.add(pooling("pool_prim", "input_prim", pooling_mode::average, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 }, padding{ { 0,0,2,2 }, 0 }));
719 network network(engine, topology);
720 set_values(input_prim, { 1.5f, -0.5f, -1.0f, 0.5f });
721 network.set_input_data("input_prim", input_prim);
723 std::vector<float> expected = {
724 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
725 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
726 0.0f, 0.0f, 0.375f, -0.125f, 0.0f, 0.0f,
727 0.0f, 0.0f, -0.25f, 0.125f, 0.0f, 0.0f,
728 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
729 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
732 auto outputs = network.execute();
733 EXPECT_EQ(outputs.size(), size_t(1));
734 EXPECT_EQ(outputs.begin()->first, "pool_prim");
736 auto output_prim = outputs.begin()->second.get_memory();
737 auto output_ptr = output_prim.pointer<float>();
738 for (size_t i = 0; i < expected.size(); ++i) {
739 EXPECT_EQ(expected[i], output_ptr[i]);
744 TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_outpad2) {
745 // Brief test description.
752 // Input offset : -1x-1
754 // [ padd, padd, padd, padd, padd]
755 // [ padd, 1.5, -1.0, -0.5, padd]
756 // [ padd, 1.0, -1.0, -1.0, padd]
757 // [ padd, -1.0, -1.0, -0.5, padd]
758 // [ padd, padd, padd, padd, padd]
762 // [0, 1.5, -0.5, 0, 0]
763 // [0, 1, -0.5, 0, 0]
766 const auto& engine = get_test_engine();
767 std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
769 for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
771 std::cout << "Testing format: " << format::order(*it) << std::endl;
773 tensor input_tensor( 1, 1, 3, 3 );
774 auto input_prim = memory::allocate(engine, { data_types::f32, *it, input_tensor });
777 topology.add(input_layout("input_prim", input_prim.get_layout()));
778 topology.add(pooling("pool_prim", "input_prim", pooling_mode::max, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 }, padding{ { 0,0,1,1 }, 0 }));
780 network network(engine, topology);
782 set_values(input_prim, {
783 1.50f, -1.00f, -0.50f,
784 1.00f, -1.00f, -1.00f,
785 -1.00f, -1.00f, -0.50f
788 network.set_input_data("input_prim", input_prim);
790 std::vector<float> expected = {
791 0.0f, 0.0f, 0.0f, 0.0f,
792 0.0f, 1.5f,-0.5f, 0.0f,
793 0.0f, 1.f, -0.5f, 0.0f,
794 0.0f, 0.0f, 0.0f, 0.0f,
797 auto outputs = network.execute();
798 EXPECT_EQ(outputs.size(), size_t(1));
799 EXPECT_EQ(outputs.begin()->first, "pool_prim");
801 auto output_prim = outputs.begin()->second.get_memory();
802 EXPECT_EQ((int)output_prim.get_layout().size.count(), 4);
803 EXPECT_EQ((int)output_prim.get_layout().get_buffer_size().count(), 16);
805 auto output_ptr = output_prim.pointer<float>();
806 for (size_t i = 0; i < expected.size(); ++i) {
807 EXPECT_EQ(expected[i], output_ptr[i]);
812 TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inpad2x1_outpad2) {
813 // Brief test description.
819 // Input Padding: 2x1 (yx format) out of the reorder layer
821 // Input offset : -1x-1
823 // [ padd, padd, padd, padd]
824 // [ padd, 1.5, -0.5, padd]
825 // [ padd, -1.0, 0.5, padd]
826 // [ padd, padd, padd, padd]
829 // [0, 0, 0, 0, 0, 0]
830 // [0, 0, 0, 0, 0, 0]
831 // [ 0, 0, 0.375, -0.125, 0, 0]
832 // [ 0, 0, -0.25, 0.125, 0, 0]
833 // [0, 0, 0, 0, 0, 0]
834 // [0, 0, 0, 0, 0, 0]
836 const auto& engine = get_test_engine();
837 std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
839 for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
841 std::cout << "Testing format: " << format::order(*it) << std::endl;
843 tensor input_tensor( 1, 1, 2, 2 );
844 auto input_prim = memory::allocate(engine, { data_types::f32, *it, input_tensor });
847 topology.add(input_layout("input_prim", input_prim.get_layout()));
848 topology.add(reorder("reorder", "input_prim", input_prim.get_layout().with_padding({ {0,0,1,2}, 0 })));
849 topology.add(pooling("pool_prim", "reorder", pooling_mode::average, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 }, padding{ { 0,0,2,2 }, 0 }));
851 network network(engine, topology);
852 set_values(input_prim, { 1.5f, -0.5f, -1.0f, 0.5f });
853 network.set_input_data("input_prim", input_prim);
855 std::vector<float> expected = {
856 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
857 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
858 0.0f, 0.0f, 0.375f, -0.125f, 0.0f, 0.0f,
859 0.0f, 0.0f, -0.25f, 0.125f, 0.0f, 0.0f,
860 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
861 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
864 auto outputs = network.execute();
865 EXPECT_EQ(outputs.size(), size_t(1));
866 EXPECT_EQ(outputs.begin()->first, "pool_prim");
868 auto output_prim = outputs.begin()->second.get_memory();
869 auto output_ptr = output_prim.pointer<float>();
870 for (size_t i = 0; i < expected.size(); ++i) {
871 EXPECT_EQ(expected[i], output_ptr[i]);
876 TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inpad2x1_outpad2) {
877 // Brief test description.
883 // Input Padding: 2x1 (yx format) out of the reorder layer
885 // Input offset : -1x-1
887 // [ padd, padd, padd, padd, padd]
888 // [ padd, 1.5, -1.0, -0.5, padd]
889 // [ padd, 1.0, -1.0, -1.0, padd]
890 // [ padd, -1.0, -1.0, -0.5, padd]
891 // [ padd, padd, padd, padd, padd]
899 const auto& engine = get_test_engine();
900 std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
902 for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
904 std::cout << "Testing format: " << format::order(*it) << std::endl;
906 tensor input_tensor( 1, 1, 3, 3 );
907 auto input_prim = memory::allocate(engine, { data_types::f32, *it, input_tensor });
910 topology.add(input_layout("input_prim", input_prim.get_layout()));
911 topology.add(reorder("reorder", "input_prim", input_prim.get_layout().with_padding({ { 0, 0, 1, 2 }, 0 })));
912 topology.add(pooling("pool_prim", "reorder", pooling_mode::max, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 }, padding{ { 0,0,1,1 }, 0 }));
914 network network(engine, topology);
916 set_values(input_prim, {
917 1.50f, -1.00f, -0.50f,
918 1.00f, -1.00f, -1.00f,
919 -1.00f, -1.00f, -0.50f
922 network.set_input_data("input_prim", input_prim);
924 std::vector<float> expected = {
925 0.0f, 0.0f, 0.0f, 0.0f,
926 0.0f, 1.5f, -0.5f, 0.0f,
927 0.0f, 1.f, -0.5f, 0.0f,
928 0.0f, 0.0f, 0.0f, 0.0f,
931 auto outputs = network.execute();
932 EXPECT_EQ(outputs.size(), size_t(1));
933 EXPECT_EQ(outputs.begin()->first, "pool_prim");
935 auto output_prim = outputs.begin()->second.get_memory();
936 EXPECT_EQ((int)output_prim.get_layout().size.count(), 4);
937 EXPECT_EQ((int)output_prim.get_layout().get_buffer_size().count(), 16);
939 auto output_ptr = output_prim.pointer<float>();
940 for (size_t i = 0; i < expected.size(); ++i) {
941 EXPECT_EQ(expected[i], output_ptr[i]);
946 TEST(pooling_forward_gpu, avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inpad2x1_outpad2) {
947 // Brief test description.
953 // Input Padding: 2x1 (yx format) out of the reorder layer
955 // Input offset : 0x0
958 // [ 5, 1.5, -0.5, 6]
959 // [ 7, -1.0, 0.5, 8]
963 // [0, 0, 0, 0, 0, 0]
964 // [0, 0, 0, 0, 0, 0]
965 // [ 0, 0, 2.375, 3.125, 0, 0]
966 // [ 0, 0, 6.25, 7.875, 0, 0]
967 // [0, 0, 0, 0, 0, 0]
968 // [0, 0, 0, 0, 0, 0]
970 const auto& engine = get_test_engine();
971 std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
973 for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
975 std::cout << "Testing format: " << format::order(*it) << std::endl;
977 tensor input_tensor( 1, 1, 4, 4 );
978 auto input_prim = memory::allocate(engine, { data_types::f32, *it, input_tensor });
981 topology.add(input_layout("input_prim", input_prim.get_layout()));
982 topology.add(reorder("reorder", "input_prim", input_prim.get_layout().with_padding({ { 0, 0, 2, 1 }, 0 })));
983 topology.add(pooling("pool_prim", "reorder", pooling_mode::average, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,0,0 }, padding{ { 0,0,2,2 }, 0 }));
985 network network(engine, topology);
986 set_values(input_prim, {
988 5.f, 1.5f, -0.5f, 6.f,
989 7.f, -1.0f, 0.5f, 8.f,
990 9.f, 10.f, 11.f, 12.f});
991 network.set_input_data("input_prim", input_prim);
993 std::vector<float> expected = {
994 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
995 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
996 0.0f, 0.0f, 2.375f, 3.125f, 0.0f, 0.0f,
997 0.0f, 0.0f, 6.25f, 7.875f, 0.0f, 0.0f,
998 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
999 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1002 auto outputs = network.execute();
1003 EXPECT_EQ(outputs.size(), size_t(1));
1004 EXPECT_EQ(outputs.begin()->first, "pool_prim");
1006 auto output_prim = outputs.begin()->second.get_memory();
1007 auto output_ptr = output_prim.pointer<float>();
1008 for (size_t i = 0; i < expected.size(); ++i) {
1009 EXPECT_EQ(expected[i], output_ptr[i]);
1014 TEST(pooling_forward_gpu, max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inpad2x1_outpad2) {
1015 // Brief test description.
1021 // Input Padding: 2x1 (yx format) out of the reorder layer
1023 // Input offset : 0x0
1026 // [ 6, 1.5, -1.0, -0.5, 7]
1027 // [ 8, 1.0, -1.0, -1.0, 9]
1028 // [ 10, -1.0, -1.0, -0.5, 11]
1029 // [ 12, 13, 14, 15, 16]
1034 // [0, 8, 1.5, 9, 0]
1035 // [0, 12, 14, 16, 0]
1038 const auto& engine = get_test_engine();
1039 std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
1041 for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
1043 std::cout << "Testing format: " << format::order(*it) << std::endl;
1045 tensor input_tensor( 1, 1, 5, 5 );
1046 auto input_prim = memory::allocate(engine, { data_types::f32, *it, input_tensor });
1049 topology.add(input_layout("input_prim", input_prim.get_layout()));
1050 topology.add(reorder("reorder", "input_prim", input_prim.get_layout().with_padding({ { 0, 0, 2, 1 }, 0 })));
1051 topology.add(pooling("pool_prim", "reorder", pooling_mode::max, { 1,1,2,2 }, { 1,1,2,2 }, { 0,0,-1,-1 }, padding{ { 0,0,1,1 }, 0 }));
1053 network network(engine, topology);
1055 set_values(input_prim, {
1056 1.f, 2.f, 3.f, 4.f, 5.f,
1057 6.f, 1.50f, -1.00f, -0.50f, 7.f,
1058 8.f, 1.00f, -1.00f, -1.00f, 9.f,
1059 10.f, -1.00f, -1.00f, -0.50f, 11.f,
1060 12.f, 13.f, 14.f, 15.f, 16.f
1063 network.set_input_data("input_prim", input_prim);
1065 std::vector<float> expected = {
1066 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1067 0.0f, 1.f, 3.f, 5.f, 0.0f,
1068 0.0f, 8.f, 1.5f, 9.f, 0.0f,
1069 0.0f, 12.f, 14.f, 16.0f, 0.0f,
1070 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1073 auto outputs = network.execute();
1074 EXPECT_EQ(outputs.size(), size_t(1));
1075 EXPECT_EQ(outputs.begin()->first, "pool_prim");
1077 auto output_prim = outputs.begin()->second.get_memory();
1078 EXPECT_EQ((int)output_prim.get_layout().size.count(), 9);
1079 EXPECT_EQ((int)output_prim.get_layout().get_buffer_size().count(), 25);
1081 auto output_ptr = output_prim.pointer<float>();
1082 for (size_t i = 0; i < expected.size(); ++i) {
1083 EXPECT_EQ(expected[i], output_ptr[i]);
1088 TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax) {
1093 // Forward Max Pooling Input:
1094 // f0: b0: 1 2 -10 b1: 0 0 -11
1095 // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
1096 // f1: b0: 5 6 -12 b1: 1.5 5.2 -13
1097 // f1: b0: 7 8 16 b1: 12 9 17
1100 // f0: b0: 4 4 b1: 0.5 0
1101 // f1: b0: 8 16 b1: 12 17
1104 // f0: b0: 4 4 b1: 15 13
1105 // f1: b0: 10 11 b1: 21 23
1108 const auto& engine = get_test_engine();
1110 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
1111 auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
1125 topology.add(input_layout("input", input.get_layout()));
1126 topology.add(mutable_data("arg_max", arg_max));
1127 topology.add(pooling("pooling", "input", "arg_max", pooling_mode::max_with_argmax, { 1, 1, 2, 2 }, { 1, 1, 1, 1 }));
1129 network network(engine, topology);
1131 network.set_input_data("input", input);
1133 auto outputs = network.execute();
1135 auto output = outputs.at("pooling").get_memory();
1136 auto output_ptr = output.pointer<float>();
1137 auto output_layout = output.get_layout();
1138 auto argmax_ptr = arg_max.pointer<float>();
1140 EXPECT_EQ(output_layout.format, format::bfyx);
1141 EXPECT_EQ(output_layout.size.spatial[1], 1);
1142 EXPECT_EQ(output_layout.size.spatial[0], 2);
1143 EXPECT_EQ(output_layout.size.feature[0], 2);
1144 EXPECT_EQ(output_layout.size.batch[0], 2);
1146 std::vector<float> expected_argmax_vec = {
1153 std::vector<float> expected_output_vec = {
1160 for (size_t i = 0; i < expected_output_vec.size(); ++i) {
1161 EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
1162 EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
1166 TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_input_padding) {
1170 // Input Padding : 2x2
1172 // Forward Max Pooling Input:
1173 // f0: b0: 1 2 -10 b1: 0 0 -11
1174 // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
1175 // f1: b0: 5 6 -12 b1: 1.5 5.2 -13
1176 // f1: b0: 7 8 16 b1: 12 9 17
1179 // f0: b0: 4 4 b1: 0.5 0
1180 // f1: b0: 8 16 b1: 12 17
1183 // f0: b0: 4 4 b1: 15 13
1184 // f1: b0: 10 11 b1: 21 23
1187 const auto& engine = get_test_engine();
1189 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
1190 auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
1204 topology.add(input_layout("input", input.get_layout()));
1205 topology.add(reorder("reorder", "input", input.get_layout().with_padding({ { 0, 0, 2, 2 }, 0 })));
1206 topology.add(mutable_data("arg_max", arg_max));
1207 topology.add(pooling("pooling", "reorder", "arg_max", pooling_mode::max_with_argmax, { 1, 1, 2, 2 }, { 1, 1, 1, 1 }));
1209 network network(engine, topology);
1211 network.set_input_data("input", input);
1213 auto outputs = network.execute();
1215 auto output = outputs.at("pooling").get_memory();
1216 auto output_ptr = output.pointer<float>();
1217 auto output_layout = output.get_layout();
1218 auto argmax_ptr = arg_max.pointer<float>();
1220 EXPECT_EQ(output_layout.format, format::bfyx);
1221 EXPECT_EQ(output_layout.size.spatial[1], 1);
1222 EXPECT_EQ(output_layout.size.spatial[0], 2);
1223 EXPECT_EQ(output_layout.size.feature[0], 2);
1224 EXPECT_EQ(output_layout.size.batch[0], 2);
1226 std::vector<float> expected_argmax_vec = {
1233 std::vector<float> expected_output_vec = {
1240 for (size_t i = 0; i < expected_output_vec.size(); ++i) {
1241 EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
1242 EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
1246 TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_output_padding) {
1250 // Output Padding : 2x2
1252 // Forward Max Pooling Input:
1253 // f0: b0: 1 2 -10 b1: 0 0 -11
1254 // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
1255 // f1: b0: 5 6 -12 b1: 1.5 5.2 -13
1256 // f1: b0: 7 8 16 b1: 12 9 17
1259 // f0: b0: 4 4 b1: 0.5 0
1260 // f1: b0: 8 16 b1: 12 17
1263 // f0: b0: 4 4 b1: 15 13
1264 // f1: b0: 10 11 b1: 21 23
1267 const auto& engine = get_test_engine();
1269 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
1270 auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
1284 topology.add(input_layout("input", input.get_layout()));
1285 topology.add(reorder("reorder", "input", input.get_layout().with_padding({ { 0, 0, 2, 2 }, 0 })));
1286 topology.add(mutable_data("arg_max", arg_max));
1287 topology.add(pooling("pooling", "reorder", "arg_max", pooling_mode::max_with_argmax, { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, padding({ 0, 0, 1, 1 }, 0)));
1289 network network(engine, topology);
1291 network.set_input_data("input", input);
1293 auto outputs = network.execute();
1295 auto output = outputs.at("pooling").get_memory();
1296 auto output_ptr = output.pointer<float>();
1297 auto output_layout = output.get_layout();
1298 auto argmax_ptr = arg_max.pointer<float>();
1300 EXPECT_EQ(output_layout.format, format::bfyx);
1301 EXPECT_EQ(output_layout.size.spatial[1], 1);
1302 EXPECT_EQ(output_layout.size.spatial[0], 2);
1303 EXPECT_EQ(output_layout.size.feature[0], 2);
1304 EXPECT_EQ(output_layout.size.batch[0], 2);
1306 std::vector<float> expected_argmax_vec = {
1313 std::vector<float> expected_output_vec = {
1314 0.0f, 0.0f, 0.0f, 0.0f,
1315 0.0f, 4.0f, 4.0f, 0.0f,
1316 0.0f, 0.0f, 0.0f, 0.0f,
1317 0.0f, 0.0f, 0.0f, 0.0f,
1318 0.0f, 8.0f, 16.0f,0.0f,
1319 0.0f, 0.0f, 0.0f, 0.0f,
1320 0.0f, 0.0f, 0.0f, 0.0f,
1321 0.0f, 0.5f, 0.0f, 0.0f,
1322 0.0f, 0.0f, 0.0f, 0.0f,
1323 0.0f, 0.0f, 0.0f, 0.0f,
1324 0.0f, 12.0f, 17.0f, 0.0f,
1325 0.0f, 0.0f, 0.0f, 0.0f,
1328 for (size_t i = 0; i < expected_output_vec.size(); ++i) {
1329 EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
1332 for (size_t i = 0; i < expected_argmax_vec.size(); ++i) {
1333 EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
1337 TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_with_output_size) {
1342 // Forward Max Pooling Input:
1343 // f0: b0: 1 2 -10 b1: 0 0 -11
1344 // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
1345 // f1: b0: 5 6 -12 b1: 1.5 5.2 -13
1346 // f1: b0: 7 8 16 b1: 12 9 17
1349 // f0: b0: 4 4 b1: 0.5 0
1350 // f1: b0: 8 16 b1: 12 17
1353 // f0: b0: 4 4 b1: 15 13
1354 // f1: b0: 10 11 b1: 21 23
1357 const auto& engine = get_test_engine();
1359 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
1360 auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
1374 topology.add(input_layout("input", input.get_layout()));
1375 topology.add(mutable_data("arg_max", arg_max));
1376 topology.add(pooling("pooling", "input", "arg_max", pooling_mode::max_with_argmax, { 1, 1, 2, 2 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, { 2, 2, 2, 1 }));
1378 network network(engine, topology);
1380 network.set_input_data("input", input);
1382 auto outputs = network.execute();
1384 auto output = outputs.at("pooling").get_memory();
1385 auto output_ptr = output.pointer<float>();
1386 auto output_layout = output.get_layout();
1387 auto argmax_ptr = arg_max.pointer<float>();
1389 EXPECT_EQ(output_layout.format, format::bfyx);
1390 EXPECT_EQ(output_layout.size.spatial[1], 1);
1391 EXPECT_EQ(output_layout.size.spatial[0], 2);
1392 EXPECT_EQ(output_layout.size.feature[0], 2);
1393 EXPECT_EQ(output_layout.size.batch[0], 2);
1395 std::vector<float> expected_argmax_vec = {
1402 std::vector<float> expected_output_vec = {
1409 for (size_t i = 0; i < expected_output_vec.size(); ++i) {
1410 EXPECT_EQ(expected_output_vec[i], output_ptr[i]);
1411 EXPECT_EQ(expected_argmax_vec[i], argmax_ptr[i]);
1415 template <class DataType>
1416 static void generic_average_wo_padding_test(format fmt, tensor output, tensor input, tensor window, tensor stride, tensor offset)
1418 constexpr auto dt = std::is_same<DataType, float>::value ? data_types::f32 : data_types::f16;
1422 if (!eng.get_info().supports_fp16)
1424 if(dt == data_types::f16)
1430 auto input_mem = memory::allocate(eng, layout{ dt, fmt, input });
1431 set_values(input_mem, std::vector<DataType>(input.count(), DataType(1)));
1432 std::vector<DataType> expected_output(output.count(), DataType(1));
1435 tpl.add(input_layout("in", input_mem.get_layout()));
1437 auto pool_in = "in";
1438 if (offset != tensor())
1440 tpl.add(reorder("reorder", "in", input_mem.get_layout().with_padding(offset.negate().sizes())));
1441 pool_in = "reorder";
1443 tpl.add(pooling("pool", pool_in, pooling_mode::average_no_padding, window, stride, offset));
1445 network net(eng, tpl);
1446 net.set_input_data("in", input_mem);
1447 auto output_mem = net.execute().at("pool").get_memory();
1449 ASSERT_TRUE(output_mem.count() == expected_output.size());
1450 EXPECT_TRUE(output_mem.get_layout().size == output);
1451 auto out_ptr = output_mem.pointer<DataType>();
1453 for (size_t i = 0; i < expected_output.size(); ++i)
1454 EXPECT_FLOAT_EQ(out_ptr[i], expected_output[i]);
1458 TEST(pooling_forward_gpu, bfyx_average_without_padding_i3x3_w2x2_s2x2)
1460 generic_average_wo_padding_test<float>(format::bfyx, spatial(2, 2), spatial(3, 3), spatial(2, 2), tensor{ 0,0,2,2 }, tensor{});
1463 TEST(pooling_forward_gpu, bfyx_average_without_padding_i3x3_w2x2_s2x2_o1x1)
1465 generic_average_wo_padding_test<float>(format::bfyx, spatial(2, 2), spatial(3, 3), spatial(2, 2), tensor{ 0,0,2,2 }, tensor{ 0,0,-1,-1 });
1468 TEST(pooling_forward_gpu, bfyx_average_without_padding_i3x3_w2x2_s3x3_o1x1)
1470 generic_average_wo_padding_test<float>(format::bfyx, spatial(2, 2), spatial(3, 3), spatial(3, 3), tensor{ 0,0,2,2 }, tensor{ 0,0,-1,-1 });
1473 TEST(pooling_forward_gpu, bfyx_average_without_padding_i1x1_w3x3_s1x1_o1x1)
1475 generic_average_wo_padding_test<float>(format::bfyx, spatial(1, 1), spatial(1, 1), spatial(3, 3), tensor{ 0,0,1,1 }, tensor{ 0,0,-1,-1 });
1479 TEST(pooling_forward_gpu, bfyx_average_without_padding_i3x3_w2x2_s2x2_fp16)
1481 generic_average_wo_padding_test<FLOAT16>(format::bfyx, spatial(2, 2), spatial(3, 3), spatial(2, 2), tensor{ 0,0,2,2 }, tensor{});
1484 TEST(pooling_forward_gpu, bfyx_average_without_padding_i3x3_w2x2_s2x2_o1x1_fp16)
1486 generic_average_wo_padding_test<FLOAT16>(format::bfyx, spatial(2, 2), spatial(3, 3), spatial(2, 2), tensor{ 0,0,2,2 }, tensor{ 0,0,-1,-1 });
1489 TEST(pooling_forward_gpu, bfyx_average_without_padding_i3x3_w2x2_s3x3_o1x1_fp16)
1491 generic_average_wo_padding_test<FLOAT16>(format::bfyx, spatial(2, 2), spatial(3, 3), spatial(3, 3), tensor{ 0,0,2,2 }, tensor{ 0,0,-1,-1 });
1494 TEST(pooling_forward_gpu, bfyx_average_without_padding_i1x1_w3x3_s1x1_o1x1_fp16)
1496 generic_average_wo_padding_test<FLOAT16>(format::bfyx, spatial(1, 1), spatial(1, 1), spatial(3, 3), tensor{ 0,0,1,1 }, tensor{ 0,0,-1,-1 });
1500 TEST(pooling_forward_gpu, yxfb_average_without_padding_i3x3_w2x2_s2x2)
1502 generic_average_wo_padding_test<float>(format::yxfb, spatial(2, 2), spatial(3, 3), spatial(2, 2), tensor{ 0,0,2,2 }, tensor{});
1505 TEST(pooling_forward_gpu, yxfb_average_without_padding_i3x3_w2x2_s2x2_o1x1)
1507 generic_average_wo_padding_test<float>(format::yxfb, spatial(2, 2), spatial(3, 3), spatial(2, 2), tensor{ 0,0,2,2 }, tensor{ 0,0,-1,-1 });
1510 TEST(pooling_forward_gpu, yxfb_average_without_padding_i3x3_w2x2_s3x3_o1x1)
1512 generic_average_wo_padding_test<float>(format::yxfb, spatial(2, 2), spatial(3, 3), spatial(3, 3), tensor{ 0,0,2,2 }, tensor{ 0,0,-1,-1 });
1515 TEST(pooling_forward_gpu, yxfb_average_without_padding_i1x1_w3x3_s1x1_o1x1)
1517 generic_average_wo_padding_test<float>(format::yxfb, spatial(1, 1), spatial(1, 1), spatial(3, 3), tensor{ 0,0,1,1 }, tensor{ 0,0,-1,-1 });
1521 TEST(pooling_forward_gpu, yxfb_average_without_padding_i3x3_w2x2_s2x2_fp16)
1523 generic_average_wo_padding_test<FLOAT16>(format::yxfb, spatial(2, 2), spatial(3, 3), spatial(2, 2), tensor{ 0,0,2,2 }, tensor{});
1526 TEST(pooling_forward_gpu, yxfb_average_without_padding_i3x3_w2x2_s2x2_o1x1_fp16)
1528 generic_average_wo_padding_test<FLOAT16>(format::yxfb, spatial(2, 2), spatial(3, 3), spatial(2, 2), tensor{ 0,0,2,2 }, tensor{ 0,0,-1,-1 });
1531 TEST(pooling_forward_gpu, yxfb_average_without_padding_i3x3_w2x2_s3x3_o1x1_fp16)
1533 generic_average_wo_padding_test<FLOAT16>(format::yxfb, spatial(2, 2), spatial(3, 3), spatial(3, 3), tensor{ 0,0,2,2 }, tensor{ 0,0,-1,-1 });
1536 TEST(pooling_forward_gpu, yxfb_average_without_padding_i1x1_w3x3_s1x1_o1x1_fp16)
1538 generic_average_wo_padding_test<FLOAT16>(format::yxfb, spatial(1, 1), spatial(1, 1), spatial(3, 3), tensor{ 0,0,1,1 }, tensor{ 0,0,-1,-1 });
1541 TEST(pooling_forward_gpu, b_fs_yx_fsv4)
1543 int B_array[] = { 16, 4, 0 }; // Batch
1544 int F_array[] = { 64, 2048, 0 }; // Features
1545 int I_array[] = { 112, 7, 0 }; // Input MxM data sizes
1546 int W_array[] = { 7, 3, 0 }; // Filter (a-ka weights) sizes
1547 int S_array[] = { 1, 2, 0 }; // Strides
1548 for (int j = 0; F_array[j]; j++) {
1549 int in_B = B_array[j];
1551 int in_F = F_array[j];
1553 int in_X = I_array[j],
1556 int W_X = W_array[j],
1559 int S_X = S_array[j],
1563 std::vector<char> Data(in_B * in_F * in_X * in_Y);
1564 for (size_t i = 0; i < Data.size(); i++)
1565 Data[i] = static_cast<char>(i);
1566 std::vector<char> DataGold(Data);
1568 // Expected "gold" output and IMAD output.
1569 std::vector<char> vGoldOutput;
1570 std::vector<char> vTestOutput;
1576 // Mem initialization
1577 // This is user data, no kernels here
1578 auto input = memory::allocate(engine,
1581 { in_B, in_F, in_X, in_Y } });
1582 set_values(input, std::move(DataGold));
1584 auto pool = pooling("pool_GOLD",
1587 { 1, 1, W_X, W_Y }, // kernel_size
1588 { 1, 1, S_X, S_Y }); // stride
1590 // Create a topology with a simple Convolution layer
1591 topology topology(input_layout("input", input.get_layout()),
1594 // Network processing
1595 network network(engine, topology);
1596 network.set_input_data("input", input);
1597 //network_exe(network, vGoldOutput, "pool_GOLD");
1598 auto outputs = network.execute();
1599 auto searchC = outputs.find("pool_GOLD");
1600 ASSERT_FALSE(searchC == outputs.end());
1601 auto output = outputs.begin()->second.get_memory();
1602 auto output_ptr = output.pointer<char>();
1603 vGoldOutput.reserve(output_ptr.size());
1604 for (size_t i = 0; i < output_ptr.size(); i++)
1605 vGoldOutput.push_back(output_ptr[i]);
1614 // Mem initialization
1615 // This is user data, no kernels here
1616 auto input = memory::allocate(engine,
1619 { in_B, in_F, in_X, in_Y } });
1620 set_values(input, std::move(Data));
1622 // Add input to topology
1624 input_layout("input", input.get_layout()));
1626 // Reorder (a-ka swizzelling) input to MMAD/IMAD Pooling format
1627 topology.add(reorder("reorder_Swizzelled",
1629 layout(data_types::i8,
1630 format::b_fs_yx_fsv4,
1631 { in_B, in_F, in_X, in_Y })));
1633 // Add Convoluiton to topology
1634 topology.add(pooling("pool_IMAD",
1635 "reorder_Swizzelled",
1637 { 1, 1, W_X, W_Y }, // kernel_size
1638 { 1, 1, S_X, S_Y })); // stride
1640 // Back reordering (a-ka unswizzelling) output from MMAD/IMAD pooling
1641 topology.add(reorder("reorder_UnSwizzelled",
1643 layout(data_types::i8,
1645 { in_B, in_F, in_X, in_Y })));
1647 network network(engine, topology);
1648 network.set_input_data("input", input);
1649 //network_exe(network, vTestOutput, "reorder_UnSwizzelled");
1650 auto outputs = network.execute();
1651 auto searchC = outputs.find("reorder_UnSwizzelled");
1652 ASSERT_FALSE(searchC == outputs.end());
1653 auto output = outputs.begin()->second.get_memory();
1654 auto output_ptr = output.pointer<char>();
1655 vTestOutput.reserve(output_ptr.size());
1656 for (size_t i = 0; i < output_ptr.size(); i++)
1657 vTestOutput.push_back(output_ptr[i]);
1660 // Result validation
1661 ASSERT_TRUE(vGoldOutput.size() == vTestOutput.size());
1662 for (size_t i = 0; i < vGoldOutput.size(); i++)
1663 ASSERT_TRUE(vTestOutput[i] == vGoldOutput[i]);
1665 } // for (int j = 0; F_array[j]; i++)
1669 class pooling_test : public tests::generic_test
1674 static void TearDownTestCase()
1676 for (auto generic_params : all_generic_params)
1678 delete generic_params;
1681 for (auto layer_params : all_layer_params)
1683 delete layer_params;
1687 static tensor generate_input_offset(int x, int y, const tensor& window_size)
1689 return tensor(0, 0, -std::min(x, window_size.spatial[0] - 1), -std::min(y, window_size.spatial[1] - 1));
1692 static std::vector<cldnn::primitive*> generate_specific_test_params()
1694 std::vector<pooling_mode> pooling_modes = { pooling_mode::max, pooling_mode::average, pooling_mode::average_no_padding };
1696 std::vector<tensor> sizes = { tensor(1, 1, 2, 2 ), tensor(1, 1, 3, 3), tensor(1, 1, 7, 4) };
1698 std::vector<tensor> strides = { tensor(1, 1, 1, 1), tensor(1, 1, 2, 2), tensor(1, 1, 4, 3) };
1700 for (auto pooling_mode : pooling_modes)
1702 for (auto size : sizes)
1704 for (auto stride : strides)
1707 all_layer_params.push_back(new pooling("pooling", "input0", pooling_mode, size, stride));
1708 all_layer_params.push_back(new pooling("pooling", "input0", pooling_mode, size, stride, generate_input_offset(4, 3, size)));
1711 all_layer_params.push_back(new pooling("pooling", "reorder0", pooling_mode, size, stride));
1714 all_layer_params.push_back(new pooling("pooling", "input0", pooling_mode, size, stride, generate_input_offset(2, 3, size), { { 0, 0, 1, 5 },{ 0, 0, 19, 4 } }));
1716 // Input + output padding
1717 all_layer_params.push_back(new pooling("pooling", "reorder0", pooling_mode, size, stride, generate_input_offset(2, 3, size), { { 0, 0, 2, 1 },{ 0, 0, 3, 4 } }));
1722 // This case tests the pooling_gpu_bfyx_average_opt kernel.
1723 all_layer_params.push_back(new pooling("pooling", "input0", pooling_mode::average, tensor(1, 1, 3, 3), tensor(1, 1, 1, 1), generate_input_offset(1, 1, tensor(1, 1, 3, 3))));
1725 return all_layer_params;
1728 static std::vector<tests::test_params*> generate_generic_test_params()
1730 return generic_test::generate_generic_test_params(all_generic_params);
1733 virtual bool is_format_supported(cldnn::format format)
1735 if ((format == cldnn_format_type::cldnn_format_yxfb) || (format == cldnn_format_type::cldnn_format_bfyx) || (format == cldnn_format_type::cldnn_format_byxf))
1742 virtual void prepare_input_for_test(std::vector<cldnn::memory>& inputs)
1744 if (generic_params->data_type == data_types::f32)
1746 prepare_input_for_test_typed<float>(inputs);
1750 prepare_input_for_test_typed<FLOAT16>(inputs);
1754 template<typename Type>
1755 void prepare_input_for_test_typed(std::vector<cldnn::memory>& inputs)
1757 int k = (generic_params->data_type == data_types::f32) ? 8 : 4;
1758 auto input = inputs[0];
1759 auto input_size = inputs[0].get_layout().size;
1760 VVVVF<Type> input_rnd = generate_random_4d<Type>(input_size.batch[0], input_size.feature[0], input_size.spatial[1], input_size.spatial[0], -10, 10, k);
1761 VF<Type> input_rnd_vec = flatten_4d<Type>(input.get_layout().format, input_rnd);
1762 set_values(input, input_rnd_vec);
1765 virtual cldnn::tensor get_expected_output_tensor()
1767 const cldnn::pooling* pooling = (cldnn::pooling*)layer_params;
1769 int batch = generic_params->input_layouts[0].size.batch[0];
1770 int feature = generic_params->input_layouts[0].size.feature[0];
1771 int height = generic_params->input_layouts[0].size.spatial[1];
1772 int width = generic_params->input_layouts[0].size.spatial[0];
1774 int input_offset_height = pooling->input_offset.spatial[1];
1775 int input_offset_width = pooling->input_offset.spatial[0];
1777 int kernel_height = pooling->size.spatial[1];
1778 int kernel_width = pooling->size.spatial[0];
1780 int stride_height = pooling->stride.spatial[1];
1781 int stride_width = pooling->stride.spatial[0];
1783 int pooled_height = (int)(ceil((float)std::max(height - 2 * input_offset_height - kernel_height, 0) / stride_height)) + 1;
1784 int pooled_width = (int)(ceil((float)std::max(width - 2 * input_offset_width - kernel_width, 0) / stride_width)) + 1;
1786 // Make sure that the last pooling starts strictly inside the image.
1787 while ((pooled_height - 1) * stride_height >= height - input_offset_height)
1791 while ((pooled_width - 1) * stride_width >= width - input_offset_width)
1796 return cldnn::tensor(batch, feature, pooled_width, pooled_height);
1799 template<typename Type>
1800 memory generate_reference_typed(const std::vector<cldnn::memory>& inputs)
1802 const cldnn::pooling* pooling = (cldnn::pooling*)layer_params;
1804 int batch = inputs[0].get_layout().size.batch[0];
1805 int feature = inputs[0].get_layout().size.feature[0];
1806 int height = inputs[0].get_layout().size.spatial[1];
1807 int width = inputs[0].get_layout().size.spatial[0];
1811 cldnn::pooling_mode pooling_mode = pooling->mode;
1813 int input_offset_width = pooling->input_offset.spatial[0];
1814 int input_offset_height = pooling->input_offset.spatial[1];
1816 int kernel_width = pooling->size.spatial[0];
1817 int kernel_height = pooling->size.spatial[1];
1819 int stride_width = pooling->stride.spatial[0];
1820 int stride_height = pooling->stride.spatial[1];
1822 auto output_tensor = get_expected_output_tensor();
1824 int pooled_width = output_tensor.spatial[0];
1825 int pooled_height = output_tensor.spatial[1];
1828 auto output = memory::allocate(engine, cldnn::layout(inputs[0].get_layout().data_type, cldnn::format::bfyx, output_tensor, pooling->output_padding));
1830 auto input_mem = inputs[0].pointer<Type>();
1831 auto output_mem = output.pointer<Type>();
1833 int output_width = output.get_layout().get_buffer_size().spatial[0];
1834 int output_height = output.get_layout().get_buffer_size().spatial[1];
1836 const auto input_desc = get_linear_memory_desc(inputs[0].get_layout());
1837 const auto output_desc = get_linear_memory_desc(output.get_layout());
1839 switch (pooling_mode)
1841 case cldnn::pooling_mode::max:
1843 for (int i = 0; i < (int)output.get_layout().get_buffer_size().count(); i++)
1845 output_mem[i] = (generic_params->data_type == data_types::f32) ? -FLT_MAX : -65504;
1847 for (int b = 0; b < batch; b++)
1849 for (int f = 0; f < feature; f++)
1851 for (int h = 0; h < pooled_height; h++)
1853 for (int w = 0; w < pooled_width; w++)
1855 int input_offset_x_start = w * stride_width + input_offset_width;
1856 int input_offset_x_end = std::min(input_offset_x_start + kernel_width, width);
1857 input_offset_x_start = std::max(input_offset_x_start, 0);
1859 int input_offset_y_start = h * stride_height + input_offset_height;
1860 int input_offset_y_end = std::min(input_offset_y_start + kernel_height, height);
1861 input_offset_y_start = std::max(input_offset_y_start, 0);
1863 const size_t output_index = get_linear_index(output.get_layout(), b, f, h, w, output_desc);
1865 for (int y = input_offset_y_start; y < input_offset_y_end; y++)
1867 for (int x = input_offset_x_start; x < input_offset_x_end; x++)
1869 const size_t input_index = get_linear_index(inputs[0].get_layout(), b, f, y, x, input_desc);
1871 if (input_mem[input_index] > output_mem[output_index])
1873 output_mem[output_index] = input_mem[input_index];
1883 case cldnn::pooling_mode::average:
1884 case cldnn::pooling_mode::average_no_padding:
1886 auto kernel_size = kernel_width * kernel_height;
1887 auto dynamic_mode = (((output_tensor.spatial[0] - 1) * stride_width) + pooling->size.spatial[0]) > -2 * input_offset_width + width ||
1888 (((output_tensor.spatial[1] - 1) * stride_height) + pooling->size.spatial[1]) > -2 * input_offset_width + height;
1890 auto divider = [=](int actual_x, int actual_y) {
1891 auto x = kernel_width;
1892 auto y = kernel_height;
1895 if (actual_x + kernel_width > width + std::abs(input_offset_width))
1897 x = (width + std::abs(input_offset_width)) - actual_x;
1899 if (actual_y + kernel_height > height + std::abs(input_offset_height))
1901 y = (height + std::abs(input_offset_height)) - actual_y;
1907 for (int i = 0; i < (int)output.get_layout().get_buffer_size().count(); i++)
1911 for (int b = 0; b < batch; b++)
1913 for (int f = 0; f < feature; f++)
1915 for (int h = 0; h < pooled_height; h++)
1917 for (int w = 0; w < pooled_width; w++)
1919 int input_offset_x_start = w * stride_width + input_offset_width;
1920 int input_offset_x_end = std::min(input_offset_x_start + kernel_width, width);
1921 input_offset_x_start = std::max(input_offset_x_start, 0);
1923 int input_offset_y_start = h * stride_height + input_offset_height;
1924 int input_offset_y_end = std::min(input_offset_y_start + kernel_height, height);
1925 input_offset_y_start = std::max(input_offset_y_start, 0);
1927 int output_index = (b * feature + f) * output_height * output_width;
1928 tensor lower_padding = pooling->output_padding.lower_size();
1929 output_index += (lower_padding.spatial[1] + h) * output_width + lower_padding.spatial[0] + w;
1931 int num_of_elements = 0;
1932 for (int y = input_offset_y_start; y < input_offset_y_end; y++)
1934 for (int x = input_offset_x_start; x < input_offset_x_end; x++)
1936 const size_t input_index = get_linear_index(inputs[0].get_layout(), b, f, y, x, input_desc);
1937 output_mem[output_index] += input_mem[input_index];
1938 if (!dynamic_mode || pooling_mode == cldnn::pooling_mode::average_no_padding)
1944 if (pooling_mode == cldnn::pooling_mode::average)
1946 num_of_elements = divider(input_offset_x_start, input_offset_y_start);
1948 if (num_of_elements == 0)
1953 output_mem[output_index] /= (Type)num_of_elements;
1970 virtual memory generate_reference(const std::vector<cldnn::memory>& inputs)
1972 if (generic_params->data_type == data_types::f32)
1974 return generate_reference_typed<float>(inputs);
1978 return generate_reference_typed<FLOAT16>(inputs);
1984 static std::vector<tests::test_params*> all_generic_params;
1985 static std::vector<cldnn::primitive*> all_layer_params;
1989 std::vector<cldnn::primitive*> pooling_test::all_layer_params = {};
1990 std::vector<tests::test_params*> pooling_test::all_generic_params = {};
1992 TEST_P(pooling_test, POOLING)
1997 INSTANTIATE_TEST_CASE_P(DISABLED_POOLING,
1999 ::testing::Combine(::testing::ValuesIn(pooling_test::generate_generic_test_params()),
2000 ::testing::ValuesIn(pooling_test::generate_specific_test_params())),
2001 tests::generic_test::custom_param_name_functor());