2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include <gtest/gtest.h>
19 #include "api/CPP/memory.hpp"
20 #include <api/CPP/input_layout.hpp>
21 #include "api/CPP/permute.hpp"
22 #include "api/CPP/reorder.hpp"
23 #include <api/CPP/topology.hpp>
24 #include <api/CPP/network.hpp>
25 #include <api/CPP/engine.hpp>
26 #include "test_utils/test_utils.h"
27 #include <api/CPP/data.hpp>
28 #include <api/CPP/fully_connected.hpp>
29 #include <api/CPP/reshape.hpp>
30 #include <api/CPP/crop.hpp>
32 #include <gmock/gmock.h>
35 using namespace cldnn;
36 using namespace tests;
37 using namespace testing;
40 TEST(permute_gpu_f32, output_ordering_test)
42 const auto& engine = get_test_engine();
45 std::vector<std::vector<int32_t>> input_tensors =
47 { 10, 5, 15, 2 },{ 2, 4, 6, 8 },{ 2, 2, 3, 2 },{ 9, 8, 7, 4 }
49 std::vector<std::vector<uint16_t>> permutations =
51 { 0, 1, 2, 3 }, //do nothing
52 { 0, 1, 3, 2 }, //replace x with y
53 { 1, 0, 3, 2 }, //replace b with f
54 { 0, 2, 3, 1 } //big permutation
56 std::vector<format> input_formats = { format::bfyx, format::yxfb };
58 auto get_permutation = [&](const std::vector<int32_t>& inp1, const std::vector<uint16_t>& order)
60 EXPECT_EQ(inp1.size(), order.size());
61 std::vector<int32_t> output;
62 for (auto const& o : order)
64 output.push_back(inp1.at(o));
69 for (auto const& fr : input_formats)
71 for (auto const& inp_t : input_tensors)
73 for (auto const& perm : permutations)
76 auto input = memory::allocate(engine, { data_types::f32, fr, tensor(inp_t) });
78 input_layout("input", input.get_layout()),
79 permute("permute", "input", perm));
81 network network(engine, topology);
82 network.set_input_data("input", input);
83 auto outputs = network.execute();
84 auto output = outputs.at("permute");
85 auto output_mem = output.get_memory();
86 EXPECT_EQ(outputs.size(), size_t(1));
87 auto ref_tensor = get_permutation(inp_t, perm);
88 auto out_tensor = output_mem.get_layout().size;
89 EXPECT_EQ(out_tensor.batch[0], ref_tensor[0]);
90 EXPECT_EQ(out_tensor.feature[0], ref_tensor[1]);
91 EXPECT_EQ(out_tensor.spatial[0], ref_tensor[2]);
92 EXPECT_EQ(out_tensor.spatial[1], ref_tensor[3]);
98 TEST(permute_gpu_f32, basic_bfyx_permute_0_1_2_3)
100 // Input : bfyx:2x2x3x2
101 // Permute order : { 0,1,3,2 }
104 // f0: b0: 1 2 -15 b1: 0 0 -15
105 // f0: b0: 3 4 -15 b1: 0.5 -0.5 -15
106 // f1: b0: 5 6 -15 b1: 1.5 5.2 -15
107 // f1: b0: 7 8 -15 b1: 12 8 -15
112 const auto& engine = get_test_engine();
114 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
116 std::vector<float> values =
131 set_values(input, values);
134 input_layout("input", input.get_layout()),
135 permute("permute", "input", { 0, 1, 2, 3 }));
137 network network(engine, topology);
138 network.set_input_data("input", input);
140 auto outputs = network.execute();
141 EXPECT_EQ(outputs.size(), size_t(1));
142 EXPECT_EQ(outputs.begin()->first, "permute");
144 auto output = outputs.begin()->second.get_memory();
147 auto output_ptr = output.pointer<float>();
148 for (int i = 0; i < 24; i++)
150 EXPECT_FLOAT_EQ(values[i], output_ptr[i]);
155 TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2)
157 // Input : bfyx:2x2x3x2
158 // Permute order : { 0,1,3,2 }
161 // f0: b0: 1 2 -15 b1: 0 0 -15
162 // f0: b0: 3 4 -15 b1: 0.5 -0.5 -15
163 // f1: b0: 5 6 -15 b1: 1.5 5.2 -15
164 // f1: b0: 7 8 -15 b1: 12 8 -15
167 // f0: b0: 1 3 b1: 0 0.5
168 // f0: b0: 2 4 b1: 0 -0.5
169 // f0: b0: -15 -15 b1: -15 -15
170 // f1: b0: 5 7 b1: 1.5 12
171 // f1: b0: 6 8 b1: 5.2 8
172 // f1: b0: -15 -15 b1: -15 -15
175 const auto& engine = get_test_engine();
177 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
194 input_layout("input", input.get_layout()),
195 permute("permute", "input", { 0, 1, 3, 2 }));
197 network network(engine, topology);
198 network.set_input_data("input", input);
200 auto outputs = network.execute();
201 EXPECT_EQ(outputs.size(), size_t(1));
202 EXPECT_EQ(outputs.begin()->first, "permute");
204 auto output = outputs.begin()->second.get_memory();
206 float answers[24] = {
224 auto output_ptr = output.pointer<float>();
225 for (int i = 0; i < 24; i++)
227 EXPECT_FLOAT_EQ(answers[i], output_ptr[i]);
232 TEST(permute_gpu_f32, basic_yxfb_permute_1_0_2_3)
234 const auto& engine = get_test_engine();
236 auto input_mem = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 100, 64, 1 } });
238 tests::set_random_values<float>(input_mem);
241 input_layout("input", input_mem.get_layout()),
242 permute("permute", "input", { 1, 0, 2, 3 }));
244 network network(engine, topology);
245 network.set_input_data("input", input_mem);
247 auto outputs = network.execute();
248 EXPECT_EQ(outputs.size(), size_t(1));
249 EXPECT_EQ(outputs.begin()->first, "permute");
251 auto output = outputs.begin()->second.get_memory();
253 auto output_ptr = output.pointer<float>();
254 auto input_ptr = input_mem.pointer<float>();
255 for (int i = 0; i < 6400; i++)
257 EXPECT_FLOAT_EQ(input_ptr[i], output_ptr[i]);
262 TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2_input_padding)
264 // Input : bfyx:2x2x3x2
265 // Permute order : { 0,1,3,2 }
266 // Input padding : 2x1
269 // f0: b0: 1 2 -15 b1: 0 0 -15
270 // f0: b0: 3 4 -15 b1: 0.5 -0.5 -15
271 // f1: b0: 5 6 -15 b1: 1.5 5.2 -15
272 // f1: b0: 7 8 -15 b1: 12 8 -15
275 // f0: b0: 1 3 b1: 0 0.5
276 // f0: b0: 2 4 b1: 0 -0.5
277 // f0: b0: -15 -15 b1: -15 -15
278 // f1: b0: 5 7 b1: 1.5 12
279 // f1: b0: 6 8 b1: 5.2 8
280 // f1: b0: -15 -15 b1: -15 -15
283 const auto& engine = get_test_engine();
285 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
302 input_layout("input", input.get_layout()),
303 reorder("reorder", "input", input.get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })),
304 permute("permute", "reorder", { 0, 1, 3, 2 }));
306 network network(engine, topology);
307 network.set_input_data("input", input);
309 auto outputs = network.execute();
310 EXPECT_EQ(outputs.size(), size_t(1));
311 EXPECT_EQ(outputs.begin()->first, "permute");
313 auto output = outputs.begin()->second.get_memory();
315 float answers[24] = {
333 auto output_ptr = output.pointer<float>();
334 for (int i = 0; i < 24; i++)
336 EXPECT_FLOAT_EQ(answers[i], output_ptr[i]);
341 TEST(permute_gpu_f32, basic_yxfb_permute_batch_with_feature)
343 // Input : yxfb:8x2x1x1
344 // Permute order : { 1, 0, 2, 3 }
345 // Output : yxfb:2x8x1x1
347 const auto& engine = get_test_engine();
349 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 8, 2, 1, 1 } });
353 1.f, 0.f, 5.f, 1.5f, 2.f, 0.f, 6.f, 5.2f,
356 3.f, 0.5f, 7.f, 12.f, 4.f, -0.5f, 8.f, 8.f
360 input_layout("input", input.get_layout()),
361 permute("permute", "input", { 1, 0, 2, 3 }));
363 network network(engine, topology);
364 network.set_input_data("input", input);
366 auto outputs = network.execute();
367 EXPECT_EQ(outputs.size(), size_t(1));
368 EXPECT_EQ(outputs.begin()->first, "permute");
370 auto output = outputs.begin()->second.get_memory();
371 auto out_tensor = output.get_layout().size;
372 EXPECT_EQ(out_tensor.batch[0], 2);
373 EXPECT_EQ(out_tensor.feature[0], 8);
374 EXPECT_EQ(out_tensor.spatial[0], 1);
375 EXPECT_EQ(out_tensor.spatial[1], 1);
377 float answers[16] = {
388 auto output_ptr = output.pointer<float>();
389 for (int i = 0; i < 16; i++)
391 EXPECT_FLOAT_EQ(answers[i], output_ptr[i]);
396 TEST(permute_gpu_f32, basic_bfyx_permute_batch_with_feature)
398 // Input : yxfb:8x2x1x1
399 // Permute order : { 1, 0, 2, 3 }
400 // Output : yxfb:2x8x1x1
402 const auto& engine = get_test_engine();
404 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 8, 1, 1 } });
408 1.f, 0.f, 5.f, 1.5f, 2.f, 0.f, 6.f, 5.2f,
411 3.f, 0.5f, 7.f, 12.f, 4.f, -0.5f, 8.f, 8.f
415 input_layout("input", input.get_layout()),
416 permute("permute", "input", { 1, 0, 2, 3 }));
418 network network(engine, topology);
419 network.set_input_data("input", input);
421 auto outputs = network.execute();
422 EXPECT_EQ(outputs.size(), size_t(1));
423 EXPECT_EQ(outputs.begin()->first, "permute");
425 auto output = outputs.begin()->second.get_memory();
426 auto out_tensor = output.get_layout().size;
427 EXPECT_EQ(out_tensor.batch[0], 8);
428 EXPECT_EQ(out_tensor.feature[0], 2);
429 EXPECT_EQ(out_tensor.spatial[0], 1);
430 EXPECT_EQ(out_tensor.spatial[1], 1);
432 float answers[16] = {
443 auto output_ptr = output.pointer<float>();
444 for (int i = 0; i < 16; i++)
446 EXPECT_FLOAT_EQ(answers[i], output_ptr[i]);
451 template<data_types DType>
452 void permute_test_with_reorder()
454 const auto& engine = get_test_engine();
456 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
473 input_layout("input", input.get_layout()),
474 reorder("reorder", "input", { DType, format::bfyx,{ 2, 2, 3, 2 } }),
475 permute("permute", "reorder", { 0, 1, 3, 2 }),
476 reorder("reorder_out", "permute", { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } }));
478 network network(engine, topology);
479 network.set_input_data("input", input);
481 auto outputs = network.execute();
482 ASSERT_EQ(outputs.size(), size_t(1));
483 EXPECT_EQ(outputs.begin()->first, "reorder_out");
485 auto output = outputs.begin()->second.get_memory();
487 float answers[24] = {
505 auto output_ptr = output.pointer<float>();
506 for (int i = 0; i < 24; i++)
508 EXPECT_FLOAT_EQ(answers[i], output_ptr[i]);
512 TEST(permute_gpu_i8, basic_bfyx_permute_0_1_3_2) {
513 permute_test_with_reorder<data_types::i8>();
516 TEST(permute_gpu_i32, basic_bfyx_permute_0_1_3_2) {
517 permute_test_with_reorder<data_types::i32>();
520 TEST(permute_gpu_i64, basic_bfyx_permute_0_1_3_2) {
521 permute_test_with_reorder<data_types::i64>();
524 TEST(fc_permute_crop_gpu, basic_permute_yxfb)
526 const auto& engine = get_test_engine();
528 auto input_mem = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 5, 1, 512 } });
530 //Topolgy creates permute which "repalces" the batch with the feature.
532 input_layout("input", input_mem.get_layout()), // yxfb {1, 5, 1, 512 }}
533 permute("permute", "input", { 1, 0, 2, 3 }) // yxfb {5, 1, 1, 512} --- without permute fix yxfb {1, 5, 512, 1}
536 network network(engine, topology);
537 network.set_input_data("input", input_mem);
539 auto outputs = network.execute();
540 EXPECT_EQ(outputs.size(), size_t(1));
541 EXPECT_EQ(outputs.begin()->first, "permute");
543 auto output = outputs.begin()->second.get_memory();
544 auto out_tensor = output.get_layout().size;
545 EXPECT_EQ(out_tensor.batch[0], 5);
546 EXPECT_EQ(out_tensor.feature[0], 1);
547 EXPECT_EQ(out_tensor.spatial[0], 1);
548 EXPECT_EQ(out_tensor.spatial[1], 512);
549 EXPECT_EQ(output.get_layout().format, cldnn::format::yxfb);
552 TEST(fc_permute_crop_gpu, basic_0)
555 const auto& engine = get_test_engine();
557 auto input_mem = memory::allocate(engine, { data_types::f32, format::bfyx,{ 5, 11264, 1, 1 } });
558 auto weights_mem = memory::allocate(engine, { data_types::f32, format::bfyx,{ 512, 11264, 1, 1 } });
559 auto bias_mem = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 512, 1 } });
562 input_layout("input", input_mem.get_layout()), // bfyx {5, 11264, 1, 1}}
563 data("weights", weights_mem),
564 data("bias", bias_mem),
565 fully_connected("fully_connected", "input", "weights", "bias"), // yxfb {5, 512, 1, 1}
566 reshape("reshape", "fully_connected", { 1, 5, 1, 512 }), // yxfb {1, 5, 1, 512}
567 permute("permute", "reshape", { 1, 0, 2, 3 }), // yxfb {5, 1, 1, 512} --- without permute fix yxfb {1, 5, 512, 1}
568 crop("crop", "permute", { 1, 1, 1, 512 }, { 4, 0, 0 ,0 }) // without permute fix it will fail "Tensor pitches didn't set correctly"
571 network network(engine, topology);
572 network.set_input_data("input", input_mem);
574 auto outputs = network.execute();
575 EXPECT_EQ(outputs.size(), size_t(1));
576 EXPECT_EQ(outputs.begin()->first, "crop");
578 auto output = outputs.begin()->second.get_memory();
579 auto out_tensor = output.get_layout().size;
580 EXPECT_EQ(out_tensor.batch[0], 1);
581 EXPECT_EQ(out_tensor.feature[0], 1);
582 EXPECT_EQ(out_tensor.spatial[0], 1);
583 EXPECT_EQ(out_tensor.spatial[1], 512);
584 EXPECT_EQ(output.get_layout().format, cldnn::format::yxfb);
587 TEST(fc_permute_gpu, basic_permute_bfyx)
589 const auto& engine = get_test_engine();
591 auto input_mem = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 5, 1, 256 } });
593 tests::set_random_values<float>(input_mem);
595 //Topolgy creates permute which "repalces" the batch with the feature.
597 input_layout("input", input_mem.get_layout()),
598 permute("permute", "input", { 1, 0, 2, 3 })
601 network network(engine, topology);
602 network.set_input_data("input", input_mem);
604 auto outputs = network.execute();
605 EXPECT_EQ(outputs.size(), size_t(1));
606 EXPECT_EQ(outputs.begin()->first, "permute");
608 auto output = outputs.begin()->second.get_memory();
609 auto out_tensor = output.get_layout().size;
610 EXPECT_EQ(out_tensor.batch[0], 5);
611 EXPECT_EQ(out_tensor.feature[0], 1);
612 EXPECT_EQ(out_tensor.spatial[0], 1);
613 EXPECT_EQ(out_tensor.spatial[1], 256);
614 EXPECT_EQ(output.get_layout().format, cldnn::format::bfyx);
616 auto input_ptr = input_mem.pointer<float>();
617 auto output_ptr = output.pointer<float>();
618 for (int i = 0; i < 5 * 256; i++)
619 EXPECT_NEAR(input_ptr[i], output_ptr[i], 1e-3f);