1 // Copyright (c) 2019 Intel Corporation
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include <gtest/gtest.h>
20 #include <api/CPP/input_layout.hpp>
21 #include <api/CPP/memory.hpp>
22 #include <api/CPP/reverse_sequence.hpp>
23 #include <api/CPP/topology.hpp>
24 #include <api/CPP/network.hpp>
27 #include <tests/test_utils/test_utils.h>
29 using namespace cldnn;
30 using namespace ::tests;
32 TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) {
35 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
36 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
37 size_t batch_axis = 1;
41 0.0f, 1.0f, 2.0f, 3.0f
44 set_values(seq_lengths, {
49 topology.add(input_layout("input", input.get_layout()));
50 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
52 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
55 network network(engine, topology);
57 network.set_input_data("input", input);
58 network.set_input_data("seq_lengths", seq_lengths);
60 auto outputs = network.execute();
62 auto output = outputs.at("reverse_sequence").get_memory();
63 auto output_ptr = output.pointer<float>();
65 std::vector<float> expected_results = {
66 0.0f, 3.0f, 2.0f, 1.0f
69 for (size_t i = 0; i < expected_results.size(); ++i) {
70 EXPECT_EQ(expected_results[i], output_ptr[i]);
74 TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) {
77 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 3, 1, 3 } });
78 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
79 size_t batch_axis = 0;
83 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
84 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
85 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f
88 set_values(seq_lengths, {
93 topology.add(input_layout("input", input.get_layout()));
94 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
96 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
99 network network(engine, topology);
101 network.set_input_data("input", input);
102 network.set_input_data("seq_lengths", seq_lengths);
104 auto outputs = network.execute();
106 auto output = outputs.at("reverse_sequence").get_memory();
107 auto output_ptr = output.pointer<float>();
109 std::vector<float> expected_results = {
110 3.0f, 4.0f, 5.0f, 0.0f, 1.0f, 2.0f, 6.0f, 7.0f, 8.0f,
111 12.0f, 13.0f, 14.0f, 9.0f, 10.0f, 11.0f, 15.0f, 16.0f, 17.0f,
112 21.0f, 22.0f, 23.0f, 18.0f, 19.0f, 20.0f, 24.0f, 25.0f, 26.0f
115 for (size_t i = 0; i < expected_results.size(); ++i) {
116 EXPECT_EQ(expected_results[i], output_ptr[i]);
120 TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba2_sa0) {
123 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 3, 1, 3 } });
124 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
125 size_t batch_axis = 2;
129 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
130 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
131 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f
134 set_values(seq_lengths, {
139 topology.add(input_layout("input", input.get_layout()));
140 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
142 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
145 network network(engine, topology);
147 network.set_input_data("input", input);
148 network.set_input_data("seq_lengths", seq_lengths);
150 auto outputs = network.execute();
152 auto output = outputs.at("reverse_sequence").get_memory();
153 auto output_ptr = output.pointer<float>();
155 std::vector<float> expected_results = {
156 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
157 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
158 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f
161 for (size_t i = 0; i < expected_results.size(); ++i) {
162 EXPECT_EQ(expected_results[i], output_ptr[i]);
166 TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa3) {
169 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 3 } });
170 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
171 size_t batch_axis = 0;
175 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
176 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
177 20.0f, 21.0f, 22.0f, 23.0f
180 set_values(seq_lengths, {
185 topology.add(input_layout("input", input.get_layout()));
186 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
188 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
191 network network(engine, topology);
193 network.set_input_data("input", input);
194 network.set_input_data("seq_lengths", seq_lengths);
196 auto outputs = network.execute();
198 auto output = outputs.at("reverse_sequence").get_memory();
199 auto output_ptr = output.pointer<float>();
201 std::vector<float> expected_results = {
202 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
203 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,
204 13.0f, 12.0f, 15.0f, 14.0f, 17.0f, 16.0f,
205 19.0f, 18.0f, 21.0f, 20.0f, 23.0f, 22.0f
208 for (size_t i = 0; i < expected_results.size(); ++i) {
209 EXPECT_EQ(expected_results[i], output_ptr[i]);
213 TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa2) {
216 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 3 } });
217 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
218 size_t batch_axis = 0;
222 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
223 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
224 20.0f, 21.0f, 22.0f, 23.0f
227 set_values(seq_lengths, {
232 topology.add(input_layout("input", input.get_layout()));
233 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
235 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
238 network network(engine, topology);
240 network.set_input_data("input", input);
241 network.set_input_data("seq_lengths", seq_lengths);
243 auto outputs = network.execute();
245 auto output = outputs.at("reverse_sequence").get_memory();
246 auto output_ptr = output.pointer<float>();
248 std::vector<float> expected_results = {
249 2.0f, 3.0f, 0.0f, 1.0f, 4.0f, 5.0f,
250 8.0f, 9.0f, 6.0f, 7.0f, 10.0f, 11.0f,
251 14.0f, 15.0f, 12.0f, 13.0f, 16.0f, 17.0f,
252 20.0f, 21.0f, 18.0f, 19.0f, 22.0f, 23.0f
255 for (size_t i = 0; i < expected_results.size(); ++i) {
256 EXPECT_EQ(expected_results[i], output_ptr[i]);
260 TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba2_sa0) {
263 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 3 } });
264 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
265 size_t batch_axis = 2;
269 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
270 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
271 20.0f, 21.0f, 22.0f, 23.0f
274 set_values(seq_lengths, {
279 topology.add(input_layout("input", input.get_layout()));
280 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
282 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
285 network network(engine, topology);
287 network.set_input_data("input", input);
288 network.set_input_data("seq_lengths", seq_lengths);
290 auto outputs = network.execute();
292 auto output = outputs.at("reverse_sequence").get_memory();
293 auto output_ptr = output.pointer<float>();
295 std::vector<float> expected_results = {
296 0.0f, 1.0f, 2.0f, 3.0f, 16.0f, 17.0f,
297 6.0f, 7.0f, 8.0f, 9.0f, 22.0f, 23.0f,
298 12.0f, 13.0f, 14.0f, 15.0f, 4.0f, 5.0f,
299 18.0f, 19.0f, 20.0f, 21.0f, 10.0f, 11.0f
302 for (size_t i = 0; i < expected_results.size(); ++i) {
303 EXPECT_EQ(expected_results[i], output_ptr[i]);
307 TEST(reverese_sequence_gpu_test, fp16_d2_2_ba1_sa0) {
310 auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } });
311 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
312 size_t batch_axis = 1;
316 FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f)
319 set_values(seq_lengths, {
324 topology.add(input_layout("input", input.get_layout()));
325 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
327 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
330 network network(engine, topology);
332 network.set_input_data("input", input);
333 network.set_input_data("seq_lengths", seq_lengths);
335 auto outputs = network.execute();
337 auto output = outputs.at("reverse_sequence").get_memory();
338 auto output_ptr = output.pointer<uint16_t>();
340 std::vector<float> expected_results = {
341 0.0f, 3.0f, 2.0f, 1.0f
344 for (size_t i = 0; i < expected_results.size(); ++i) {
345 EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
349 TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba0_sa1) {
352 auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 3, 1, 3 } });
353 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
354 size_t batch_axis = 0;
358 FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f), FLOAT16(8.0f), FLOAT16(9.0f),
359 FLOAT16(10.0f), FLOAT16(11.0f), FLOAT16(12.0f), FLOAT16(13.0f), FLOAT16(14.0f), FLOAT16(15.0f), FLOAT16(16.0f), FLOAT16(17.0f), FLOAT16(18.0f), FLOAT16(19.0f),
360 FLOAT16(20.0f), FLOAT16(21.0f), FLOAT16(22.0f), FLOAT16(23.0f), FLOAT16(24.0f), FLOAT16(25.0f), FLOAT16(26.0f)
363 set_values(seq_lengths, {
368 topology.add(input_layout("input", input.get_layout()));
369 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
371 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
374 network network(engine, topology);
376 network.set_input_data("input", input);
377 network.set_input_data("seq_lengths", seq_lengths);
379 auto outputs = network.execute();
381 auto output = outputs.at("reverse_sequence").get_memory();
382 auto output_ptr = output.pointer<uint16_t >();
384 std::vector<float> expected_results = {
385 3.0f, 4.0f, 5.0f, 0.0f, 1.0f, 2.0f, 6.0f, 7.0f, 8.0f,
386 12.0f, 13.0f, 14.0f, 9.0f, 10.0f, 11.0f, 15.0f, 16.0f, 17.0f,
387 21.0f, 22.0f, 23.0f, 18.0f, 19.0f, 20.0f, 24.0f, 25.0f, 26.0f
390 for (size_t i = 0; i < expected_results.size(); ++i) {
391 EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
395 TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba2_sa0) {
398 auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 3, 1, 3 } });
399 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
400 size_t batch_axis = 2;
404 FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f), FLOAT16(8.0f), FLOAT16(9.0f),
405 FLOAT16(10.0f), FLOAT16(11.0f), FLOAT16(12.0f), FLOAT16(13.0f), FLOAT16(14.0f), FLOAT16(15.0f), FLOAT16(16.0f), FLOAT16(17.0f), FLOAT16(18.0f), FLOAT16(19.0f),
406 FLOAT16(20.0f), FLOAT16(21.0f), FLOAT16(22.0f), FLOAT16(23.0f), FLOAT16(24.0f), FLOAT16(25.0f), FLOAT16(26.0f)
409 set_values(seq_lengths, {
414 topology.add(input_layout("input", input.get_layout()));
415 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
417 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
420 network network(engine, topology);
422 network.set_input_data("input", input);
423 network.set_input_data("seq_lengths", seq_lengths);
425 auto outputs = network.execute();
427 auto output = outputs.at("reverse_sequence").get_memory();
428 auto output_ptr = output.pointer<uint16_t>();
430 std::vector<float> expected_results = {
431 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
432 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
433 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f
436 for (size_t i = 0; i < expected_results.size(); ++i) {
437 EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
441 TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa3) {
444 auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 2, 3 } });
445 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
446 size_t batch_axis = 0;
450 FLOAT16(0.0f), FLOAT16( 1.0f), FLOAT16( 2.0f), FLOAT16( 3.0f), FLOAT16( 4.0f), FLOAT16( 5.0f), FLOAT16( 6.0f), FLOAT16( 7.0f), FLOAT16( 8.0f), FLOAT16( 9.0f),
451 FLOAT16(10.0f), FLOAT16( 11.0f), FLOAT16( 12.0f), FLOAT16( 13.0f), FLOAT16( 14.0f), FLOAT16( 15.0f), FLOAT16( 16.0f), FLOAT16( 17.0f), FLOAT16( 18.0f), FLOAT16( 19.0f),
452 FLOAT16(20.0f), FLOAT16( 21.0f), FLOAT16( 22.0f), FLOAT16( 23.0f)
455 set_values(seq_lengths, {
460 topology.add(input_layout("input", input.get_layout()));
461 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
463 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
466 network network(engine, topology);
468 network.set_input_data("input", input);
469 network.set_input_data("seq_lengths", seq_lengths);
471 auto outputs = network.execute();
473 auto output = outputs.at("reverse_sequence").get_memory();
474 auto output_ptr = output.pointer<uint16_t>();
476 std::vector<float> expected_results = {
477 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
478 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,
479 13.0f, 12.0f, 15.0f, 14.0f, 17.0f, 16.0f,
480 19.0f, 18.0f, 21.0f, 20.0f, 23.0f, 22.0f
483 for (size_t i = 0; i < expected_results.size(); ++i) {
484 EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
488 TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa2) {
491 auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 2, 3 } });
492 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
493 size_t batch_axis = 0;
497 FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f), FLOAT16(8.0f), FLOAT16(9.0f),
498 FLOAT16(10.0f), FLOAT16(11.0f), FLOAT16(12.0f), FLOAT16(13.0f), FLOAT16(14.0f), FLOAT16(15.0f), FLOAT16(16.0f), FLOAT16(17.0f), FLOAT16(18.0f), FLOAT16(19.0f),
499 FLOAT16(20.0f), FLOAT16(21.0f), FLOAT16(22.0f), FLOAT16(23.0f)
502 set_values(seq_lengths, {
507 topology.add(input_layout("input", input.get_layout()));
508 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
510 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
513 network network(engine, topology);
515 network.set_input_data("input", input);
516 network.set_input_data("seq_lengths", seq_lengths);
518 auto outputs = network.execute();
520 auto output = outputs.at("reverse_sequence").get_memory();
521 auto output_ptr = output.pointer<uint16_t>();
523 std::vector<float> expected_results = {
524 2.0f, 3.0f, 0.0f, 1.0f, 4.0f, 5.0f,
525 8.0f, 9.0f, 6.0f, 7.0f, 10.0f, 11.0f,
526 14.0f, 15.0f, 12.0f, 13.0f, 16.0f, 17.0f,
527 20.0f, 21.0f, 18.0f, 19.0f, 22.0f, 23.0f
530 for (size_t i = 0; i < expected_results.size(); ++i) {
531 EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
535 TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba2_sa0) {
538 auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 2, 3 } });
539 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
540 size_t batch_axis = 2;
544 FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f), FLOAT16(8.0f), FLOAT16(9.0f),
545 FLOAT16(10.0f), FLOAT16(11.0f), FLOAT16(12.0f), FLOAT16(13.0f), FLOAT16(14.0f), FLOAT16(15.0f), FLOAT16(16.0f), FLOAT16(17.0f), FLOAT16(18.0f), FLOAT16(19.0f),
546 FLOAT16(20.0f), FLOAT16(21.0f), FLOAT16(22.0f), FLOAT16(23.0f)
549 set_values(seq_lengths, {
554 topology.add(input_layout("input", input.get_layout()));
555 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
557 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
560 network network(engine, topology);
562 network.set_input_data("input", input);
563 network.set_input_data("seq_lengths", seq_lengths);
565 auto outputs = network.execute();
567 auto output = outputs.at("reverse_sequence").get_memory();
568 auto output_ptr = output.pointer<uint16_t>();
570 std::vector<float> expected_results = {
571 0.0f, 1.0f, 2.0f, 3.0f, 16.0f, 17.0f,
572 6.0f, 7.0f, 8.0f, 9.0f, 22.0f, 23.0f,
573 12.0f, 13.0f, 14.0f, 15.0f, 4.0f, 5.0f,
574 18.0f, 19.0f, 20.0f, 21.0f, 10.0f, 11.0f
577 for (size_t i = 0; i < expected_results.size(); ++i) {
578 EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));