/*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// Expected output:
// [ 8.0, 0.0, 0.0, 4,0, 0,5, -0.5, -0.5, -0.5 ]
- engine engine;
+ const auto& engine = get_test_engine();
auto input_prim = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 8, 3, 3 } });
// Expected output:
// [ 2.0]
- engine engine;
+ const auto& engine = get_test_engine();
auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
EXPECT_EQ(2.0f, output_ptr[0]);
}
+TEST(pooling_forward_gpu, basic_max_yxfb_f32_global_i3x3x1x1_nopad) {
+ // Brief test description.
+ //
+ // Pool mode: max
+ // Global pooling: true
+ // Padding: none
+ //
+ // Input data:
+ // [-0.5, 1.0, 0.5]
+ // [ 2.0, 1.5, -0.5]
+ // [ 0.0, -1.0, 0.5]
+ //
+ // Expected output:
+ // [ 2.0]
+
+ const auto& engine = get_test_engine();
+
+ auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 3, 3 } });
+
+ topology topology;
+ topology.add(input_layout("input_prim", input_prim.get_layout()));
+ topology.add(pooling("pool_prim", "input_prim", pooling_mode::max));
+
+ network network(engine, topology);
+ set_values(input_prim, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f });
+ network.set_input_data("input_prim", input_prim);
+
+ auto outputs = network.execute();
+ EXPECT_EQ(outputs.size(), size_t(1));
+ EXPECT_EQ(outputs.begin()->first, "pool_prim");
+
+ auto output_prim = outputs.begin()->second.get_memory();
+
+ auto output_ptr = output_prim.pointer<float>();
+
+ EXPECT_EQ(2.0f, output_ptr[0]);
+}
+
TEST(pooling_forward_gpu, basic_max_pooling_int8) {
- engine engine;
+ const auto& engine = get_test_engine();
layout in_layout = { type_to_data_type<float>::value,format::byxf,{ 1,1,3,3 } };
layout out_layout = { type_to_data_type<float>::value,format::byxf,{ 1,1,1,1 } };
layout byte_layout = { type_to_data_type<int8_t>::value, format::bfyx,{ 1,1,3,3 } };
TEST(pooling_forward_gpu, basic_avg_pooling_int8) {
- engine engine;
+ const auto& engine = get_test_engine();
layout in_layout = { type_to_data_type<float>::value,format::byxf,{ 1,1,3,3 } };
layout out_layout = { type_to_data_type<float>::value,format::byxf,{ 1,1,1,1 } };
layout byte_layout = { type_to_data_type<int8_t>::value, format::bfyx,{ 1,1,3,3 } };
// [ 2.0, 1.5]
// [ 2.0, 1.5]
- engine engine;
+ const auto& engine = get_test_engine();
auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
// [ 2.0, 0.5]
// [ 0.5, 0.5]
- engine engine;
+ const auto& engine = get_test_engine();
auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } });
// [ 0.5, 1.0] [ 1.0, 0.5]
// [-0.5, 1.5] [ 1.0, 0.0]
- engine engine;
+ const auto& engine = get_test_engine();
auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 3 } });
// [ 1.5, -0.5]
// [ -1, 0.5]
- engine engine;
+ const auto& engine = get_test_engine();
auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
// [ 1.5, -0.5]
// [ 1, -0.5]
- engine engine;
+ const auto& engine = get_test_engine();
auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
// [ 1.0, 0.625]
// [ 1.625, 0.875]
- engine engine;
+ const auto& engine = get_test_engine();
auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
// [ 0.375, -0.125]
// [ -0.25, 0.125]
- engine engine;
+ const auto& engine = get_test_engine();
auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
// [ 0.177777, -0.133333]
// [ 0.333333, 0.55]
- engine engine;
+ const auto& engine = get_test_engine();
auto input_prim = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 3 } });
// [ 0.375, 0.5]
// [ -0.125, -1.125]
- engine engine;
+ const auto& engine = get_test_engine();
auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
// [0, 0, 0, 0, 0, 0]
// [0, 0, 0, 0, 0, 0]
- engine engine;
+ const auto& engine = get_test_engine();
std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
// [0, 1, -0.5, 0, 0]
// [0, 0, 0, 0, 0]
- engine engine;
+ const auto& engine = get_test_engine();
std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
// [0, 0, 0, 0, 0, 0]
// [0, 0, 0, 0, 0, 0]
- engine engine;
+ const auto& engine = get_test_engine();
std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
// [0, 1, -0.5, 0]
// [0, 0, 0, 0, 0]
- engine engine;
+ const auto& engine = get_test_engine();
std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
// [0, 0, 0, 0, 0, 0]
// [0, 0, 0, 0, 0, 0]
- engine engine;
+ const auto& engine = get_test_engine();
std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
// [0, 12, 14, 16, 0]
// [0, 0, 0, 0, 0]
- engine engine;
+ const auto& engine = get_test_engine();
std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
// f1: b0: 10 11 b1: 21 23
- engine engine;
+ const auto& engine = get_test_engine();
auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
// f1: b0: 10 11 b1: 21 23
- engine engine;
+ const auto& engine = get_test_engine();
auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
// f1: b0: 10 11 b1: 21 23
- engine engine;
+ const auto& engine = get_test_engine();
auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
// f1: b0: 10 11 b1: 21 23
- engine engine;
+ const auto& engine = get_test_engine();
auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
generic_average_wo_padding_test<FLOAT16>(format::yxfb, spatial(1, 1), spatial(1, 1), spatial(3, 3), tensor{ 0,0,1,1 }, tensor{ 0,0,-1,-1 });
}
+TEST(pooling_forward_gpu, b_fs_yx_fsv4)
+{
+ int B_array[] = { 16, 4, 0 }; // Batch
+ int F_array[] = { 64, 2048, 0 }; // Features
+ int I_array[] = { 112, 7, 0 }; // Input MxM data sizes
+ int W_array[] = { 7, 3, 0 }; // Filter (a-ka weights) sizes
+ int S_array[] = { 1, 2, 0 }; // Strides
+ for (int j = 0; F_array[j]; j++) {
+ int in_B = B_array[j];
+
+ int in_F = F_array[j];
+
+ int in_X = I_array[j],
+ in_Y = in_X;
+
+ int W_X = W_array[j],
+ W_Y = W_X;
+
+ int S_X = S_array[j],
+ S_Y = S_X;
+
+ // Input data init
+ std::vector<char> Data(in_B * in_F * in_X * in_Y);
+ for (size_t i = 0; i < Data.size(); i++)
+ Data[i] = static_cast<char>(i);
+ std::vector<char> DataGold(Data);
+
+ // Expected "gold" output and IMAD output.
+ std::vector<char> vGoldOutput;
+ std::vector<char> vTestOutput;
+
+ engine engine;
+
+ // "Golden" Pooling
+ {
+ // Mem initialization
+ // This is user data, no kernels here
+ auto input = memory::allocate(engine,
+ { data_types::i8,
+ format::bfyx,
+ { in_B, in_F, in_X, in_Y } });
+ set_values(input, std::move(DataGold));
+
+ auto pool = pooling("pool_GOLD",
+ "input",
+ pooling_mode::max,
+ { 1, 1, W_X, W_Y }, // kernel_size
+ { 1, 1, S_X, S_Y }); // stride
+
+ // Create a topology with a simple Convolution layer
+ topology topology(input_layout("input", input.get_layout()),
+ pool);
+
+ // Network processing
+ network network(engine, topology);
+ network.set_input_data("input", input);
+ //network_exe(network, vGoldOutput, "pool_GOLD");
+ auto outputs = network.execute();
+ auto searchC = outputs.find("pool_GOLD");
+ ASSERT_FALSE(searchC == outputs.end());
+ auto output = outputs.begin()->second.get_memory();
+ auto output_ptr = output.pointer<char>();
+ vGoldOutput.reserve(output_ptr.size());
+ for (size_t i = 0; i < output_ptr.size(); i++)
+ vGoldOutput.push_back(output_ptr[i]);
+ }
+
+ //
+ // IMAD Pooling
+ //
+ {
+ topology topology;
+
+ // Mem initialization
+ // This is user data, no kernels here
+ auto input = memory::allocate(engine,
+ { data_types::i8,
+ format::bfyx,
+ { in_B, in_F, in_X, in_Y } });
+ set_values(input, std::move(Data));
+
+ // Add input to topology
+ topology.add(
+ input_layout("input", input.get_layout()));
+
+ // Reorder (a-ka swizzelling) input to MMAD/IMAD Pooling format
+ topology.add(reorder("reorder_Swizzelled",
+ "input",
+ layout(data_types::i8,
+ format::b_fs_yx_fsv4,
+ { in_B, in_F, in_X, in_Y })));
+
+ // Add Convoluiton to topology
+ topology.add(pooling("pool_IMAD",
+ "reorder_Swizzelled",
+ pooling_mode::max,
+ { 1, 1, W_X, W_Y }, // kernel_size
+ { 1, 1, S_X, S_Y })); // stride
+
+ // Back reordering (a-ka unswizzelling) output from MMAD/IMAD pooling
+ topology.add(reorder("reorder_UnSwizzelled",
+ "pool_IMAD",
+ layout(data_types::i8,
+ format::bfyx,
+ { in_B, in_F, in_X, in_Y })));
+
+ network network(engine, topology);
+ network.set_input_data("input", input);
+ //network_exe(network, vTestOutput, "reorder_UnSwizzelled");
+ auto outputs = network.execute();
+ auto searchC = outputs.find("reorder_UnSwizzelled");
+ ASSERT_FALSE(searchC == outputs.end());
+ auto output = outputs.begin()->second.get_memory();
+ auto output_ptr = output.pointer<char>();
+ vTestOutput.reserve(output_ptr.size());
+ for (size_t i = 0; i < output_ptr.size(); i++)
+ vTestOutput.push_back(output_ptr[i]);
+ }
+
+ // Result validation
+ ASSERT_TRUE(vGoldOutput.size() == vTestOutput.size());
+ for (size_t i = 0; i < vGoldOutput.size(); i++)
+ ASSERT_TRUE(vTestOutput[i] == vGoldOutput[i]);
+
+ } // for (int j = 0; F_array[j]; i++)
+}
+
class pooling_test : public tests::generic_test
{