2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include <gtest/gtest.h>
19 #include "api/CPP/memory.hpp"
20 #include <api/CPP/input_layout.hpp>
21 #include "api/CPP/eltwise.hpp"
22 #include "api/CPP/reorder.hpp"
23 #include "api/CPP/custom_gpu_primitive.hpp"
24 #include <api/CPP/engine.hpp>
25 #include <api/CPP/topology.hpp>
26 #include <api/CPP/network.hpp>
27 #include <api/CPP/engine.hpp>
28 #include "test_utils/test_utils.h"
33 template<> struct type_to_data_type<FLOAT16> { static const data_types value = data_types::f16; };
36 using namespace cldnn;
37 using namespace tests;
39 TEST(custom_gpu_primitive_f32, add_basic_in2x2x2x2) {
45 // f0: b0: 1 2 b1: 0 0
46 // f0: b0: 3 4 b1: 0.5 -0.5
47 // f1: b0: 5 6 b1: 1.5 5.2
48 // f1: b0: 7 8 b1: 12 8
51 // f0: b0: 0.5 5 b1: 2.5 7
52 // f0: b0: 15 -2 b1: 17 6.5
53 // f1: b0: 0.5 2 b1: 2.5 4
54 // f1: b0: 8 -0.5 b1: 10 -2.5
57 // f0: b0: 1.5 7 b1: 2.5 7
58 // f0: b0: 18 2 b1: 17.5 6
59 // f1: b0: 5.5 8 b1: 4 9.2
60 // f1: b0: 15 16.5 b1: 22 16.5
63 const auto& engine = get_test_engine();
65 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
66 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
68 std::string kernel_code =
70 __kernel void add_kernel(const __global float* input0, const __global float* input1, __global float* output)
72 const unsigned idx = get_global_id(0);
73 output[idx] = input0[idx] + input1[idx];
76 std::string entry_point = "add_kernel";
77 std::vector<cldnn_arg> parameters = { {arg_input, 0}, {arg_input, 1 }, {arg_output, 0 } };
78 layout output_layout = { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } };
79 std::vector<size_t> gws = { output_layout.count() };
81 topology.add(input_layout("input", input.get_layout()));
82 topology.add(input_layout("input2", input2.get_layout()));
83 topology.add(custom_gpu_primitive(
85 { "input", "input2" },
101 0.5f, 2.5f, 0.5f, 2.5f,
103 15.f, 17.f, 8.f, 10.f,
104 -2.f, 6.5f, -0.5f, -2.5f });
106 network network(engine, topology);
108 network.set_input_data("input", input);
109 network.set_input_data("input2", input2);
110 auto outputs = network.execute();
112 EXPECT_EQ(outputs.size(), size_t(1));
113 EXPECT_EQ(outputs.begin()->first, "user_kernel");
115 auto output = outputs.at("user_kernel").get_memory();
117 float answers[16] = { 1.5f, 2.5f, 5.5f, 4.f,
119 18.f,17.5f, 15.f, 22.f,
120 2.f, 6.f, 7.5f, 5.5f };
122 auto output_ptr = output.pointer<float>();
124 for (int i = 0; i < 16; i++)
126 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
130 template<data_types DType>
131 void add_basic_in2x2x2x2_with_reorder()
138 // f0: b0: 1 2 b1: 0 0
139 // f0: b0: 3 4 b1: 0.5 -0.5
140 // f1: b0: 5 6 b1: 1.5 5.2
141 // f1: b0: 7 8 b1: 12 8
144 // f0: b0: 0.5 5 b1: 2.5 7
145 // f0: b0: 15 -2 b1: 17 6.5
146 // f1: b0: 0.5 2 b1: 2.5 4
147 // f1: b0: 8 -0.5 b1: 10 -2.5
150 // f0: b0: 1.5 7 b1: 2.5 7
151 // f0: b0: 18 2 b1: 17.5 6
152 // f1: b0: 5.5 8 b1: 4 9.2
153 // f1: b0: 15 16.5 b1: 22 16.5
156 const auto& engine = get_test_engine();
158 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
159 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
161 std::string data_type_string = "float";
164 case data_types::i32:
165 data_type_string = "int";
167 case data_types::i64:
168 data_type_string = "long";
171 throw std::runtime_error("Test does not support this data format!");
174 std::string kernel_code =
175 "__kernel void add_kernel(const __global " + data_type_string + "* input0, const __global " + data_type_string + "* input1, __global " + data_type_string + "* output)\n" +
177 " const unsigned idx = get_global_id(0);\n" +
178 " output[idx] = input0[idx] + input1[idx];\n" +
180 std::string entry_point = "add_kernel";
181 std::vector<cldnn_arg> parameters = { { arg_input, 0 },{ arg_input, 1 },{ arg_output, 0 } };
182 layout output_layout = { DType, format::yxfb,{ 2, 2, 2, 2 } };
183 std::vector<size_t> gws = { output_layout.count() };
185 topology.add(input_layout("input", input.get_layout()));
186 topology.add(input_layout("input2", input2.get_layout()));
187 topology.add(reorder("to_int1", "input", { DType, format::yxfb,{ 2,2,2,2 } }));
188 topology.add(reorder("to_int2", "input2", { DType, format::yxfb,{ 2,2,2,2 } }));
189 topology.add(custom_gpu_primitive(
191 { "to_int1", "to_int2" },
198 topology.add(reorder("to_float", "user_kernel", { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }));
210 15.f, 17.f, 8.f, 10.f,
211 -2.f, 6.f, 0.f, -2.f });
213 network network(engine, topology);
215 network.set_input_data("input", input);
216 network.set_input_data("input2", input2);
217 auto outputs = network.execute();
219 ASSERT_EQ(outputs.size(), size_t(1));
220 EXPECT_EQ(outputs.begin()->first, "to_float");
222 auto output = outputs.at("to_float").get_memory();
224 float answers[16] = { 1.f, 2.f, 5.f, 3.f,
226 18.f,17.f, 15.f, 22.f,
227 2.f, 6.f, 8.f, 6.f };
229 auto output_ptr = output.pointer<float>();
231 for (int i = 0; i < 16; i++)
233 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
237 TEST(custom_gpu_primitive_int32, add_basic_in2x2x2x2) {
238 add_basic_in2x2x2x2_with_reorder<data_types::i32>();
241 TEST(custom_gpu_primitive_int64, add_basic_in2x2x2x2) {
242 add_basic_in2x2x2x2_with_reorder<data_types::i64>();
245 TEST(custom_gpu_primitive_f32, eltwise_add_basic_in2x2x2x2) {
251 // f0: b0: 1 2 b1: 0 0
252 // f0: b0: 3 4 b1: 0.5 -0.5
253 // f1: b0: 5 6 b1: 1.5 5.2
254 // f1: b0: 7 8 b1: 12 8
257 // f0: b0: 0.5 5 b1: 2.5 7
258 // f0: b0: 15 -2 b1: 17 6.5
259 // f1: b0: 0.5 2 b1: 2.5 4
260 // f1: b0: 8 -0.5 b1: 10 -2.5
263 // f0: b0: 1.5 7 b1: 2.5 7
264 // f0: b0: 18 2 b1: 17.5 6
265 // f1: b0: 5.5 8 b1: 4 9.2
266 // f1: b0: 15 16.5 b1: 22 16.5
269 const auto& engine = get_test_engine();
271 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
272 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
274 std::string kernel_code =
276 __kernel void add_kernel(const __global float* input0, __global float* output)
278 const unsigned idx = get_global_id(0);
279 output[idx] = input0[idx] + 1;
282 std::string entry_point = "add_kernel";
283 std::vector<cldnn_arg> parameters = { { arg_input, 0 },{ arg_output, 0 } };
284 layout output_layout = { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } };
285 std::vector<size_t> gws = { output_layout.count() };
287 topology.add(input_layout("input", input.get_layout()));
288 topology.add(input_layout("input2", input2.get_layout()));
289 topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
290 topology.add(custom_gpu_primitive(
303 3.f, 0.5f, 7.f, 12.f,
308 0.5f, 2.5f, 0.5f, 2.5f,
310 15.f, 17.f, 8.f, 10.f,
311 -2.f, 6.5f, -0.5f, -2.5f });
313 network network(engine, topology);
315 network.set_input_data("input", input);
316 network.set_input_data("input2", input2);
317 auto outputs = network.execute();
319 EXPECT_EQ(outputs.size(), size_t(1));
320 EXPECT_EQ(outputs.begin()->first, "user_kernel");
322 auto output = outputs.at("user_kernel").get_memory();
325 { 2.5f, 3.5f, 6.5f, 5.f,
326 8.f, 8.f, 9.f, 10.2f,
327 19.f, 18.5f, 16.f, 23.f,
328 3.f, 7.f, 8.5f, 6.5f };
330 auto output_ptr = output.pointer<float>();
332 for (int i = 0; i < 16; i++)
334 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
338 TEST(custom_gpu_primitive_f32, add_eltwise_basic_in2x2x2x2) {
344 // f0: b0: 1 2 b1: 0 0
345 // f0: b0: 3 4 b1: 0.5 -0.5
346 // f1: b0: 5 6 b1: 1.5 5.2
347 // f1: b0: 7 8 b1: 12 8
350 // f0: b0: 0.5 5 b1: 2.5 7
351 // f0: b0: 15 -2 b1: 17 6.5
352 // f1: b0: 0.5 2 b1: 2.5 4
353 // f1: b0: 8 -0.5 b1: 10 -2.5
356 // f0: b0: 1.5 7 b1: 2.5 7
357 // f0: b0: 18 2 b1: 17.5 6
358 // f1: b0: 5.5 8 b1: 4 9.2
359 // f1: b0: 15 16.5 b1: 22 16.5
362 const auto& engine = get_test_engine();
364 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
365 auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
367 std::string kernel_code =
369 __kernel void add_kernel(const __global float* input0, __global float* output)
371 const unsigned idx = get_global_id(0);
372 output[idx] = input0[idx] + SCALAR;
375 std::string entry_point = "add_kernel";
376 std::vector<cldnn_arg> parameters = { { arg_input, 0 },{ arg_output, 0 } };
377 layout output_layout = { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } };
378 std::vector<size_t> gws = { output_layout.count() };
380 topology.add(input_layout("input", input.get_layout()));
381 topology.add(input_layout("input2", input2.get_layout()));
382 topology.add(custom_gpu_primitive(
388 "-cl-mad-enable -DSCALAR=1",
391 topology.add(eltwise("eltwise", {"user_kernel", "input2"}, eltwise_mode::sum));
396 3.f, 0.5f, 7.f, 12.f,
401 0.5f, 2.5f, 0.5f, 2.5f,
403 15.f, 17.f, 8.f, 10.f,
404 -2.f, 6.5f, -0.5f, -2.5f });
406 network network(engine, topology);
408 network.set_input_data("input", input);
409 network.set_input_data("input2", input2);
410 auto outputs = network.execute();
412 EXPECT_EQ(outputs.size(), size_t(1));
413 EXPECT_EQ(outputs.begin()->first, "eltwise");
415 auto output = outputs.at("eltwise").get_memory();
418 { 2.5f, 3.5f, 6.5f, 5.f,
419 8.f, 8.f, 9.f, 10.2f,
420 19.f, 18.5f, 16.f, 23.f,
421 3.f, 7.f, 8.5f, 6.5f };
423 auto output_ptr = output.pointer<float>();
425 for (int i = 0; i < 16; i++)
427 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
431 TEST(custom_gpu_primitive_f32, two_kernels_with_same_entry_point_basic_in2x2x2x2) {
437 // f0: b0: 1 2 b1: 0 0
438 // f0: b0: 3 4 b1: 0.5 -0.5
439 // f1: b0: 5 6 b1: 1.5 5.2
440 // f1: b0: 7 8 b1: 12 8
443 // f0: b0: 0.5 5 b1: 2.5 7
444 // f0: b0: 15 -2 b1: 17 6.5
445 // f1: b0: 0.5 2 b1: 2.5 4
446 // f1: b0: 8 -0.5 b1: 10 -2.5
449 // f0: b0: 1.5 7 b1: 2.5 7
450 // f0: b0: 18 2 b1: 17.5 6
451 // f1: b0: 5.5 8 b1: 4 9.2
452 // f1: b0: 15 16.5 b1: 22 16.5
455 const auto& engine = get_test_engine();
457 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
459 std::string kernel_code1 =
461 __kernel void add_kernel(const __global float* input0, __global float* output)
463 const unsigned idx = get_global_id(0);
464 output[idx] = input0[idx] + SCALAR;
468 std::string kernel_code2 =
470 __kernel void add_kernel(const __global float* input0, __global float* output)
472 const unsigned idx = get_global_id(0);
473 output[idx] = input0[idx] + 2*SCALAR;
476 std::string entry_point = "add_kernel";
477 std::vector<cldnn_arg> parameters = { { arg_input, 0 },{ arg_output, 0 } };
478 layout output_layout = { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } };
479 std::vector<size_t> gws = { output_layout.count() };
481 topology.add(input_layout("input", input.get_layout()));
482 topology.add(custom_gpu_primitive(
488 "-cl-mad-enable -DSCALAR=1",
491 topology.add(custom_gpu_primitive(
497 "-cl-mad-enable -DSCALAR=3",
504 3.f, 0.5f, 7.f, 12.f,
508 network network(engine, topology);
510 network.set_input_data("input", input);
511 auto outputs = network.execute();
513 EXPECT_EQ(outputs.size(), size_t(1));
514 EXPECT_EQ(outputs.begin()->first, "user_kernel2");
516 auto output = outputs.at("user_kernel2").get_memory();
518 auto output_ptr = output.pointer<float>();
519 auto input_ptr = input.pointer<float>();
521 for (int i = 0; i < 16; i++)
523 EXPECT_TRUE(are_equal(input_ptr[i] + 7, output_ptr[i]));
527 TEST(custom_gpu_primitive_u8, add_basic_in2x2x2x2) {
528 const auto& engine = get_test_engine();
530 auto input = memory::allocate(engine, { data_types::u8, format::yxfb,{ 2, 2, 2, 2 } });
531 auto input2 = memory::allocate(engine, { data_types::u8, format::yxfb,{ 2, 2, 2, 2 } });
533 std::string kernel_code =
535 __kernel void add_kernel(const __global uchar* input0, const __global uchar* input1, __global uchar* output)
537 const unsigned idx = get_global_id(0);
538 output[idx] = input0[idx] + input1[idx];
541 std::string entry_point = "add_kernel";
542 std::vector<cldnn_arg> parameters = { { arg_input, 0 },{ arg_input, 1 },{ arg_output, 0 } };
543 layout output_layout = { data_types::u8, format::yxfb,{ 2, 2, 2, 2 } };
544 std::vector<size_t> gws = { output_layout.count() };
546 topology.add(input_layout("input", input.get_layout()));
547 topology.add(input_layout("input2", input2.get_layout()));
548 topology.add(custom_gpu_primitive(
550 { "input", "input2" },
558 set_values<unsigned char>(input, {
565 set_values<unsigned char>(input2, {
572 network network(engine, topology);
574 network.set_input_data("input", input);
575 network.set_input_data("input2", input2);
576 auto outputs = network.execute();
578 EXPECT_EQ(outputs.size(), size_t(1));
579 EXPECT_EQ(outputs.begin()->first, "user_kernel");
581 auto output = outputs.at("user_kernel").get_memory();
583 unsigned char answers[16] = {
590 auto output_ptr = output.pointer<unsigned char>();
592 for (int i = 0; i < 16; i++)
594 EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));