1 // Copyright (c) 2019 Intel Corporation
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 ///////////////////////////////////////////////////////////////////////////////////////////////////
16 #include <gtest/gtest.h>
18 #include <api/input_layout.hpp>
19 #include <api/memory.hpp>
20 #include <api/reverse_sequence.hpp>
21 #include <api/topology.hpp>
22 #include <api/network.hpp>
25 #include <tests/test_utils/test_utils.h>
27 using namespace cldnn;
28 using namespace ::tests;
30 TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) {
33 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
34 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
35 int32_t batch_axis = 1;
39 0.0f, 1.0f, 2.0f, 3.0f
42 set_values(seq_lengths, {
47 topology.add(input_layout("input", input.get_layout()));
48 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
50 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
53 network network(engine, topology);
55 network.set_input_data("input", input);
56 network.set_input_data("seq_lengths", seq_lengths);
58 auto outputs = network.execute();
60 auto output = outputs.at("reverse_sequence").get_memory();
61 auto output_ptr = output.pointer<float>();
63 std::vector<float> expected_results = {
64 0.0f, 3.0f, 2.0f, 1.0f
67 for (size_t i = 0; i < expected_results.size(); ++i) {
68 EXPECT_EQ(expected_results[i], output_ptr[i]);
72 TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) {
75 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 3, 1, 3 } });
76 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
77 int32_t batch_axis = 0;
81 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
82 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
83 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f
86 set_values(seq_lengths, {
91 topology.add(input_layout("input", input.get_layout()));
92 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
94 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
97 network network(engine, topology);
99 network.set_input_data("input", input);
100 network.set_input_data("seq_lengths", seq_lengths);
102 auto outputs = network.execute();
104 auto output = outputs.at("reverse_sequence").get_memory();
105 auto output_ptr = output.pointer<float>();
107 std::vector<float> expected_results = {
108 3.0f, 4.0f, 5.0f, 0.0f, 1.0f, 2.0f, 6.0f, 7.0f, 8.0f,
109 12.0f, 13.0f, 14.0f, 9.0f, 10.0f, 11.0f, 15.0f, 16.0f, 17.0f,
110 21.0f, 22.0f, 23.0f, 18.0f, 19.0f, 20.0f, 24.0f, 25.0f, 26.0f
113 for (size_t i = 0; i < expected_results.size(); ++i) {
114 EXPECT_EQ(expected_results[i], output_ptr[i]);
118 TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba2_sa0) {
121 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 3, 1, 3 } });
122 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
123 int32_t batch_axis = 2;
124 int32_t seq_axis = 0;
127 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
128 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
129 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f
132 set_values(seq_lengths, {
137 topology.add(input_layout("input", input.get_layout()));
138 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
140 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
143 network network(engine, topology);
145 network.set_input_data("input", input);
146 network.set_input_data("seq_lengths", seq_lengths);
148 auto outputs = network.execute();
150 auto output = outputs.at("reverse_sequence").get_memory();
151 auto output_ptr = output.pointer<float>();
153 std::vector<float> expected_results = {
154 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
155 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
156 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f
159 for (size_t i = 0; i < expected_results.size(); ++i) {
160 EXPECT_EQ(expected_results[i], output_ptr[i]);
164 TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa3) {
167 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 3 } });
168 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
169 int32_t batch_axis = 0;
170 int32_t seq_axis = 3;
173 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
174 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
175 20.0f, 21.0f, 22.0f, 23.0f
178 set_values(seq_lengths, {
183 topology.add(input_layout("input", input.get_layout()));
184 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
186 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
189 network network(engine, topology);
191 network.set_input_data("input", input);
192 network.set_input_data("seq_lengths", seq_lengths);
194 auto outputs = network.execute();
196 auto output = outputs.at("reverse_sequence").get_memory();
197 auto output_ptr = output.pointer<float>();
199 std::vector<float> expected_results = {
200 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
201 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,
202 13.0f, 12.0f, 15.0f, 14.0f, 17.0f, 16.0f,
203 19.0f, 18.0f, 21.0f, 20.0f, 23.0f, 22.0f
206 for (size_t i = 0; i < expected_results.size(); ++i) {
207 EXPECT_EQ(expected_results[i], output_ptr[i]);
211 TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa2) {
214 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 3 } });
215 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
216 int32_t batch_axis = 0;
217 int32_t seq_axis = 2;
220 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
221 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
222 20.0f, 21.0f, 22.0f, 23.0f
225 set_values(seq_lengths, {
230 topology.add(input_layout("input", input.get_layout()));
231 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
233 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
236 network network(engine, topology);
238 network.set_input_data("input", input);
239 network.set_input_data("seq_lengths", seq_lengths);
241 auto outputs = network.execute();
243 auto output = outputs.at("reverse_sequence").get_memory();
244 auto output_ptr = output.pointer<float>();
246 std::vector<float> expected_results = {
247 2.0f, 3.0f, 0.0f, 1.0f, 4.0f, 5.0f,
248 8.0f, 9.0f, 6.0f, 7.0f, 10.0f, 11.0f,
249 14.0f, 15.0f, 12.0f, 13.0f, 16.0f, 17.0f,
250 20.0f, 21.0f, 18.0f, 19.0f, 22.0f, 23.0f
253 for (size_t i = 0; i < expected_results.size(); ++i) {
254 EXPECT_EQ(expected_results[i], output_ptr[i]);
258 TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba2_sa0) {
261 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 3 } });
262 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
263 int32_t batch_axis = 2;
264 int32_t seq_axis = 0;
267 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
268 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
269 20.0f, 21.0f, 22.0f, 23.0f
272 set_values(seq_lengths, {
277 topology.add(input_layout("input", input.get_layout()));
278 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
280 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
283 network network(engine, topology);
285 network.set_input_data("input", input);
286 network.set_input_data("seq_lengths", seq_lengths);
288 auto outputs = network.execute();
290 auto output = outputs.at("reverse_sequence").get_memory();
291 auto output_ptr = output.pointer<float>();
293 std::vector<float> expected_results = {
294 0.0f, 1.0f, 2.0f, 3.0f, 16.0f, 17.0f,
295 6.0f, 7.0f, 8.0f, 9.0f, 22.0f, 23.0f,
296 12.0f, 13.0f, 14.0f, 15.0f, 4.0f, 5.0f,
297 18.0f, 19.0f, 20.0f, 21.0f, 10.0f, 11.0f
300 for (size_t i = 0; i < expected_results.size(); ++i) {
301 EXPECT_EQ(expected_results[i], output_ptr[i]);
305 TEST(reverese_sequence_gpu_test, fp16_d2_2_ba1_sa0) {
308 auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } });
309 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
310 int32_t batch_axis = 1;
311 int32_t seq_axis = 0;
314 FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f)
317 set_values(seq_lengths, {
322 topology.add(input_layout("input", input.get_layout()));
323 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
325 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
328 network network(engine, topology);
330 network.set_input_data("input", input);
331 network.set_input_data("seq_lengths", seq_lengths);
333 auto outputs = network.execute();
335 auto output = outputs.at("reverse_sequence").get_memory();
336 auto output_ptr = output.pointer<uint16_t>();
338 std::vector<float> expected_results = {
339 0.0f, 3.0f, 2.0f, 1.0f
342 for (size_t i = 0; i < expected_results.size(); ++i) {
343 EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
347 TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba0_sa1) {
350 auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 3, 1, 3 } });
351 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
352 int32_t batch_axis = 0;
353 int32_t seq_axis = 1;
356 FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f), FLOAT16(8.0f), FLOAT16(9.0f),
357 FLOAT16(10.0f), FLOAT16(11.0f), FLOAT16(12.0f), FLOAT16(13.0f), FLOAT16(14.0f), FLOAT16(15.0f), FLOAT16(16.0f), FLOAT16(17.0f), FLOAT16(18.0f), FLOAT16(19.0f),
358 FLOAT16(20.0f), FLOAT16(21.0f), FLOAT16(22.0f), FLOAT16(23.0f), FLOAT16(24.0f), FLOAT16(25.0f), FLOAT16(26.0f)
361 set_values(seq_lengths, {
366 topology.add(input_layout("input", input.get_layout()));
367 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
369 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
372 network network(engine, topology);
374 network.set_input_data("input", input);
375 network.set_input_data("seq_lengths", seq_lengths);
377 auto outputs = network.execute();
379 auto output = outputs.at("reverse_sequence").get_memory();
380 auto output_ptr = output.pointer<uint16_t >();
382 std::vector<float> expected_results = {
383 3.0f, 4.0f, 5.0f, 0.0f, 1.0f, 2.0f, 6.0f, 7.0f, 8.0f,
384 12.0f, 13.0f, 14.0f, 9.0f, 10.0f, 11.0f, 15.0f, 16.0f, 17.0f,
385 21.0f, 22.0f, 23.0f, 18.0f, 19.0f, 20.0f, 24.0f, 25.0f, 26.0f
388 for (size_t i = 0; i < expected_results.size(); ++i) {
389 EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
393 TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba2_sa0) {
396 auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 3, 1, 3 } });
397 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
398 int32_t batch_axis = 2;
399 int32_t seq_axis = 0;
402 FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f), FLOAT16(8.0f), FLOAT16(9.0f),
403 FLOAT16(10.0f), FLOAT16(11.0f), FLOAT16(12.0f), FLOAT16(13.0f), FLOAT16(14.0f), FLOAT16(15.0f), FLOAT16(16.0f), FLOAT16(17.0f), FLOAT16(18.0f), FLOAT16(19.0f),
404 FLOAT16(20.0f), FLOAT16(21.0f), FLOAT16(22.0f), FLOAT16(23.0f), FLOAT16(24.0f), FLOAT16(25.0f), FLOAT16(26.0f)
407 set_values(seq_lengths, {
412 topology.add(input_layout("input", input.get_layout()));
413 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
415 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
418 network network(engine, topology);
420 network.set_input_data("input", input);
421 network.set_input_data("seq_lengths", seq_lengths);
423 auto outputs = network.execute();
425 auto output = outputs.at("reverse_sequence").get_memory();
426 auto output_ptr = output.pointer<uint16_t>();
428 std::vector<float> expected_results = {
429 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
430 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
431 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f
434 for (size_t i = 0; i < expected_results.size(); ++i) {
435 EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
439 TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa3) {
442 auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 2, 3 } });
443 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
444 int32_t batch_axis = 0;
445 int32_t seq_axis = 3;
448 FLOAT16(0.0f), FLOAT16( 1.0f), FLOAT16( 2.0f), FLOAT16( 3.0f), FLOAT16( 4.0f), FLOAT16( 5.0f), FLOAT16( 6.0f), FLOAT16( 7.0f), FLOAT16( 8.0f), FLOAT16( 9.0f),
449 FLOAT16(10.0f), FLOAT16( 11.0f), FLOAT16( 12.0f), FLOAT16( 13.0f), FLOAT16( 14.0f), FLOAT16( 15.0f), FLOAT16( 16.0f), FLOAT16( 17.0f), FLOAT16( 18.0f), FLOAT16( 19.0f),
450 FLOAT16(20.0f), FLOAT16( 21.0f), FLOAT16( 22.0f), FLOAT16( 23.0f)
453 set_values(seq_lengths, {
458 topology.add(input_layout("input", input.get_layout()));
459 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
461 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
464 network network(engine, topology);
466 network.set_input_data("input", input);
467 network.set_input_data("seq_lengths", seq_lengths);
469 auto outputs = network.execute();
471 auto output = outputs.at("reverse_sequence").get_memory();
472 auto output_ptr = output.pointer<uint16_t>();
474 std::vector<float> expected_results = {
475 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
476 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,
477 13.0f, 12.0f, 15.0f, 14.0f, 17.0f, 16.0f,
478 19.0f, 18.0f, 21.0f, 20.0f, 23.0f, 22.0f
481 for (size_t i = 0; i < expected_results.size(); ++i) {
482 EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
486 TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa2) {
489 auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 2, 3 } });
490 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
491 int32_t batch_axis = 0;
492 int32_t seq_axis = 2;
495 FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f), FLOAT16(8.0f), FLOAT16(9.0f),
496 FLOAT16(10.0f), FLOAT16(11.0f), FLOAT16(12.0f), FLOAT16(13.0f), FLOAT16(14.0f), FLOAT16(15.0f), FLOAT16(16.0f), FLOAT16(17.0f), FLOAT16(18.0f), FLOAT16(19.0f),
497 FLOAT16(20.0f), FLOAT16(21.0f), FLOAT16(22.0f), FLOAT16(23.0f)
500 set_values(seq_lengths, {
505 topology.add(input_layout("input", input.get_layout()));
506 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
508 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
511 network network(engine, topology);
513 network.set_input_data("input", input);
514 network.set_input_data("seq_lengths", seq_lengths);
516 auto outputs = network.execute();
518 auto output = outputs.at("reverse_sequence").get_memory();
519 auto output_ptr = output.pointer<uint16_t>();
521 std::vector<float> expected_results = {
522 2.0f, 3.0f, 0.0f, 1.0f, 4.0f, 5.0f,
523 8.0f, 9.0f, 6.0f, 7.0f, 10.0f, 11.0f,
524 14.0f, 15.0f, 12.0f, 13.0f, 16.0f, 17.0f,
525 20.0f, 21.0f, 18.0f, 19.0f, 22.0f, 23.0f
528 for (size_t i = 0; i < expected_results.size(); ++i) {
529 EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
533 TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba2_sa0) {
536 auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 2, 3 } });
537 auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
538 int32_t batch_axis = 2;
539 int32_t seq_axis = 0;
542 FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f), FLOAT16(8.0f), FLOAT16(9.0f),
543 FLOAT16(10.0f), FLOAT16(11.0f), FLOAT16(12.0f), FLOAT16(13.0f), FLOAT16(14.0f), FLOAT16(15.0f), FLOAT16(16.0f), FLOAT16(17.0f), FLOAT16(18.0f), FLOAT16(19.0f),
544 FLOAT16(20.0f), FLOAT16(21.0f), FLOAT16(22.0f), FLOAT16(23.0f)
547 set_values(seq_lengths, {
552 topology.add(input_layout("input", input.get_layout()));
553 topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
555 reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
558 network network(engine, topology);
560 network.set_input_data("input", input);
561 network.set_input_data("seq_lengths", seq_lengths);
563 auto outputs = network.execute();
565 auto output = outputs.at("reverse_sequence").get_memory();
566 auto output_ptr = output.pointer<uint16_t>();
568 std::vector<float> expected_results = {
569 0.0f, 1.0f, 2.0f, 3.0f, 16.0f, 17.0f,
570 6.0f, 7.0f, 8.0f, 9.0f, 22.0f, 23.0f,
571 12.0f, 13.0f, 14.0f, 15.0f, 4.0f, 5.0f,
572 18.0f, 19.0f, 20.0f, 21.0f, 10.0f, 11.0f
575 for (size_t i = 0; i < expected_results.size(); ++i) {
576 EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));