2 // Copyright (c) 2019 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include <gtest/gtest.h>
19 #include "api/CPP/memory.hpp"
20 #include <api/CPP/input_layout.hpp>
21 #include "api/CPP/fully_connected.hpp"
22 #include <api/CPP/topology.hpp>
23 #include <api/CPP/tensor.hpp>
24 #include <api/CPP/network.hpp>
25 #include <api/CPP/engine.hpp>
26 #include "test_utils/test_utils.h"
27 #include <api/CPP/data.hpp>
28 #include "instrumentation.h"
34 template<> struct type_to_data_type<FLOAT16> { static const data_types value = data_types::f16; };
37 using namespace cldnn;
38 using namespace tests;
40 cldnn::format::type layout_4d(cldnn::format f) {
42 case cldnn::format::bfyx:
43 return cldnn::format::bfyx;
44 case cldnn::format::yxfb:
45 return cldnn::format::yxfb;
52 VVVVF<T> fully_connected_reference(VVVVF<T> &input, VVVVF<T> &weights, VF<T> &bias, bool relu = false, T slope = 0.0f) {
53 size_t input_f = input[0].size();
54 size_t input_y = input[0][0].size();
55 size_t input_x = input[0][0][0].size();
56 size_t output_b = input.size(); // input is assumed to be bfyx
57 size_t output_f = weights.size(); // weights is assumed to be bfyx
58 VVVVF<T> output(output_b, VVVF<T>(1, VVF<T>(1, VF<T>(output_f))));
60 for (size_t b = 0; b < output_b; ++b) {
61 for (size_t n = 0; n < output_f; ++n) {
63 for (size_t f = 0; f < input_f; ++f) {
64 for (size_t y = 0; y < input_y; ++y) {
65 for (size_t x = 0; x < input_x; ++x) {
66 res += (float)input[b][f][y][x] * (float)weights[n][f][y][x];
70 if (relu && res < (float)0)
72 output[b][0][0][n] = (T)res;
79 void generic_fully_connected_test(cldnn::format test_input_fmt, cldnn::format test_weights_fmt, int input_b, int f, int y, int x, int output_f, bool relu, T slope = 0) {
80 int min_random = -2, max_random = 2;
81 VVVVF<T> input_rnd = generate_random_4d<T>(input_b, f, y, x, min_random, max_random);
82 VVVVF<T> weights_rnd = generate_random_4d<T>(output_f, f, y, x, min_random, max_random);
83 VF<T> bias_rnd_vec = generate_random_1d<T>(output_f, min_random, max_random);
84 VF<T> input_rnd_vec = flatten_4d<T>(test_input_fmt, input_rnd);
85 VF<T> weights_rnd_vec = flatten_4d<T>(test_weights_fmt, weights_rnd);
87 const auto& engine = get_test_engine();
88 tensor input_tensor(input_b, f, x, y);
89 tensor weights_tensor(output_f, f, x, y);
90 auto input = memory::allocate(engine, { type_to_data_type<T>::value, test_input_fmt, input_tensor });
91 auto weights = memory::allocate(engine, { type_to_data_type<T>::value, test_weights_fmt, weights_tensor });
92 auto bias = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1,1,output_f,1 } });
93 set_values(input, input_rnd_vec);
94 set_values(weights, weights_rnd_vec);
95 set_values(bias, bias_rnd_vec);
98 input_layout("input", input.get_layout()),
99 data("weights", weights),
101 fully_connected("fully_connected", "input", "weights", "bias", relu, slope)
104 network network(engine, topology);
105 network.set_input_data("input", input);
107 auto outputs = network.execute();
108 EXPECT_EQ(outputs.size(), size_t(1));
109 EXPECT_EQ(outputs.begin()->first, "fully_connected");
111 auto output_memory = outputs.at("fully_connected").get_memory();
112 auto output_layout = output_memory.get_layout();
113 auto output_ptr = output_memory.pointer<T>();
115 //EXPECT_EQ(output_layout.format.value, test_input_fmt);
116 tensor output_tensor = output_layout.size;
117 int b_size = output_tensor.batch[0];
118 int x_size = output_tensor.feature[0];
119 EXPECT_EQ(b_size, input_b);
120 EXPECT_EQ(x_size, output_f);
121 unsigned num_of_operations = f * x * y * 2;
122 float ulp = (1.0f / 1024.0f) * num_of_operations;
123 bool test_is_correct = true;
124 VVVVF<T> output_cpu = fully_connected_reference<T>(input_rnd, weights_rnd, bias_rnd_vec, relu, slope);
125 VF<T> output_cpu_vec = flatten_4d<T>(layout_4d(output_layout.format), output_cpu);
126 for (size_t i = 0; i < output_cpu_vec.size(); ++i) {
127 if (std::abs(float(output_cpu_vec[i]) - float(output_ptr[i])) > ulp) {
128 EXPECT_FLOAT_EQ(output_cpu_vec[i], output_ptr[i]); // to print the problematic values
129 test_is_correct = false;
134 EXPECT_EQ(test_is_correct, true) << std::endl
135 << "failing test parameters:" << std::endl
136 << "test_input_fmt = " << format::traits(test_input_fmt).order << std::endl
137 << "test_weights_fmt = " << format::traits(test_weights_fmt).order << std::endl
138 << "input_b = " << input_b << std::endl
139 << "f = " << f << std::endl
140 << "y = " << y << std::endl
141 << "x = " << x << std::endl
142 << "output_f = " << output_f << std::endl
143 << "relu = " << relu << std::endl
144 << "slope = " << (float)slope << std::endl
145 << "type = " << (sizeof(T) == 2 ? "float16" : "float32") << std::endl;
148 TEST(DISABLED_fully_connected_gpu, generic_random_short) {
149 VF<cldnn::format> test_input_fmts = { cldnn::format::bfyx, cldnn::format::yxfb };
150 VF<cldnn::format> test_weights_fmts = { cldnn::format::yxfb };
151 VF<bool> relu = { true, false };
152 std::vector<int> batches = { 1, 2, 4, 8, 16 };
153 std::vector<int> features = { 1, 2 };
154 std::vector<std::pair<int, int>> input_sizes = { {28, 28}, {64, 64}, {100, 100}, {227, 227}, {1000, 1}, {1, 4096} };
155 VF<int> outputs_x = { 5, 16 };
157 const auto& engine = get_test_engine();
158 bool f16_supported = !!engine.get_info().supports_fp16;
159 if (!f16_supported) {
160 std::cout << "[ SKIPPED ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl;
163 for (cldnn::format test_input_fmt : test_input_fmts) {
164 for (cldnn::format test_weights_fmt : test_weights_fmts) {
165 for (const auto& b : batches) {
166 for(const auto& f : features) {
167 for (const auto& sizes : input_sizes) {
168 for (int output_f : outputs_x) {
169 for (bool relu_activated : relu) {
170 generic_fully_connected_test<float>(test_input_fmt, test_weights_fmt, b, f, sizes.second, sizes.first, output_f, relu_activated);
171 if (!f16_supported) continue;
172 generic_fully_connected_test<FLOAT16>(test_input_fmt, test_weights_fmt, b, f, sizes.second, sizes.first, output_f, relu_activated);
182 TEST(fully_connected_gpu, no_biases) {
203 const int32_t input_x = 3, input_b = 1, // size of whole input buffer
204 weight_b = 4, weight_x = 3; // size of whole weights buffer
206 const auto& engine = get_test_engine();
208 auto input_prim = memory::allocate(engine, { data_types::f32,format::yxfb,{ input_b, 1, input_x, 1} });
209 auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } });
211 set_values(input_prim, { -0.5f, 2.0f, 0.5f });
212 set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f });
214 auto input = input_layout("input", input_prim.get_layout());
215 auto w_data = data("weights", weights_prim);
216 auto fc = fully_connected("full_con_prim", "input", "weights");
219 topology.add(w_data);
222 network network(engine, topology);
223 network.set_input_data("input", input_prim);
225 auto outputs = network.execute();
226 EXPECT_EQ(outputs.size(), size_t(1));
227 EXPECT_EQ(outputs.begin()->first, "full_con_prim");
229 auto output_prim = outputs.begin()->second.get_memory();
231 auto output_ptr = output_prim.pointer<float>();
233 EXPECT_EQ(1.5f, output_ptr[0]);
234 EXPECT_EQ(0.75f, output_ptr[1]);
235 EXPECT_EQ(-2.25f, output_ptr[2]);
236 EXPECT_EQ(3.0f, output_ptr[3]);
240 TEST(fully_connected_gpu, no_biases_int8) {
261 const int32_t input_x = 3, input_b = 1, // size of whole input buffer
262 weight_b = 4, weight_x = 3; // size of whole weights buffer
264 const auto& engine = get_test_engine();
266 auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ input_b, 1, input_x, 1 } });
267 auto weights_prim = memory::allocate(engine, { data_types::i8,format::bfyx,{ weight_b, 1, weight_x, 1 } });
269 set_values(input_prim, { 8.4f, 2.3f, -4.99f });
270 set_values<char>(weights_prim, { 2, 1, 0, -3, -2, 1, 0, -2, -4, -5, 10, 8 });
272 auto input = input_layout("input", input_prim.get_layout());
273 auto w_data = data("weights", weights_prim);
274 auto ri = reorder("reorder_to_int", "input", { data_types::i8,format::bfyx,{ input_b, 1, input_x, 1 } });
275 auto fc = fully_connected("full_con_prim", "reorder_to_int", "weights");
276 auto rf = reorder("reorder_to_float", "full_con_prim", { data_types::f32,format::bfyx,{ input_b, 1, 4, 1 } });
279 topology.add(w_data);
283 network network(engine, topology);
284 network.set_input_data("input", input_prim);
286 auto outputs = network.execute();
287 EXPECT_EQ(outputs.size(), size_t(1));
288 EXPECT_EQ(outputs.begin()->first, "reorder_to_float");
290 auto output_prim = outputs.begin()->second.get_memory();
292 auto output_ptr = output_prim.pointer<float>();
294 EXPECT_EQ(18.0f, output_ptr[0]);
295 EXPECT_EQ(-32.0f, output_ptr[1]);
296 EXPECT_EQ(12.0f, output_ptr[2]);
297 EXPECT_EQ(-52.0f, output_ptr[3]);
301 TEST(fully_connected_gpu, xb_f32_batch_1) {
317 // 1.0, 2.0, 3.0, 4.0
322 const int32_t output_f = 4, // size of whole output buffer
323 input_x = 3, input_b = 1, // size of whole input buffer
324 weight_b = 4, weight_x = 3; // size of whole weights buffer
326 const auto& engine = get_test_engine();
328 auto input_prim = memory::allocate( engine, { data_types::f32, format::yxfb, { input_b, 1, input_x, 1 } });
329 auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } });
330 auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx, { 1,1,output_f, 1} });
332 set_values(input_prim, { -0.5f, 2.0f, 0.5f });
333 set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f });
334 set_values(bias_prim, { 1.0f, 2.0f, 3.0f, 4.0f });
337 input_layout("input", input_prim.get_layout()),
338 data("weights", weights_prim),
339 data("bias", bias_prim),
340 fully_connected("full_con_prim", "input", "weights", "bias")
343 network network(engine, topology);
344 network.set_input_data("input", input_prim);
346 auto outputs = network.execute();
347 EXPECT_EQ(outputs.size(), size_t(1));
348 EXPECT_EQ(outputs.begin()->first, "full_con_prim");
350 auto output_prim = outputs.begin()->second.get_memory();
352 auto output_ptr = output_prim.pointer<float>();
354 EXPECT_EQ(2.5f, output_ptr[0]);
355 EXPECT_EQ(2.75f, output_ptr[1]);
356 EXPECT_EQ(0.75f, output_ptr[2]);
357 EXPECT_EQ(7.0f, output_ptr[3]);
360 TEST(fully_connected_gpu, xb_f32_batch_2) {
376 // 1.0, 2.0, 3.0, 4.0
382 const int32_t output_f = 4, // size of whole output buffer
383 input_x = 3, input_b = 2, // size of whole input buffer
384 weight_b = 4, weight_x = 3; // size of whole weights buffer
386 const auto& engine = get_test_engine();
388 auto input_prim = memory::allocate(engine, { data_types::f32,format::yxfb,{ input_b,1,input_x, 1 } });
389 auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } });
390 auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f,1 } });
392 set_values(input_prim, { -0.5f, 1.0f, 2.0f, 1.5f, 0.5f, 0.0f });
393 set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f });
394 set_values(bias_prim, { 1.0f, 2.0f, 3.0f, 4.0f });
397 input_layout("input", input_prim.get_layout()),
398 data("weights", weights_prim),
399 data("bias", bias_prim),
400 fully_connected("full_con_prim", "input", "weights", "bias")
403 network network(engine, topology);
404 network.set_input_data("input", input_prim);
406 auto outputs = network.execute();
407 EXPECT_EQ(outputs.size(), size_t(1));
408 EXPECT_EQ(outputs.begin()->first, "full_con_prim");
410 auto output_prim = outputs.begin()->second.get_memory();
412 auto output_ptr = output_prim.pointer<float>();
414 EXPECT_EQ(2.50f, output_ptr[0]);
415 EXPECT_EQ(4.00f, output_ptr[1]);
416 EXPECT_EQ(2.75f, output_ptr[2]);
417 EXPECT_EQ(1.00f, output_ptr[3]);
418 EXPECT_EQ(0.75f, output_ptr[4]);
419 EXPECT_EQ(2.75f, output_ptr[5]);
420 EXPECT_EQ(7.00f, output_ptr[6]);
421 EXPECT_EQ(5.00f, output_ptr[7]);
424 TEST(fully_connected_gpu, x_f32) {
439 // 1.0, 2.0, 3.0, 4.0
443 const int32_t output_f = 4, // size of whole output buffer
444 input_x = 3, // size of whole input buffer
445 weight_b = 4, weight_x = 3; // size of whole weights buffer
447 const auto& engine = get_test_engine();
449 auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx, { 1,1,input_x,1 } });
450 //auto output_prim = memory::allocate({ memory::format::xb_f32,{ output_b,{ { output_f } },{ 1 } } });
451 auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } });
452 auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f,1 } });
454 set_values(input_prim, { -0.5f, 2.0f, 0.5f });
455 set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f });
456 set_values(bias_prim, { 1.0f, 2.0f, 3.0f, 4.0f });
459 input_layout("input", input_prim.get_layout()),
460 data("weights", weights_prim),
461 data("bias", bias_prim),
462 fully_connected("full_con_prim", "input", "weights", "bias")
465 network network(engine, topology);
466 network.set_input_data("input", input_prim);
468 auto outputs = network.execute();
469 EXPECT_EQ(outputs.size(), size_t(1));
470 EXPECT_EQ(outputs.begin()->first, "full_con_prim");
472 auto output_prim = outputs.begin()->second.get_memory();
474 auto output_ptr = output_prim.pointer<float>();
476 EXPECT_EQ(2.50f, output_ptr[0]);
477 EXPECT_EQ(2.75f, output_ptr[1]);
478 EXPECT_EQ(0.75f, output_ptr[2]);
479 EXPECT_EQ(7.00f, output_ptr[3]);
483 TEST(fully_connected_gpu, yxfn_f32) {
484 // Input : 1x2x1x2 - 1 batch 2 feature maps of size 2x1
485 // Output : 2x1 - 2 batches 1 neuron each
486 // Weights: 2x2x1x2 - 2 neurons with weights of 2 feature maps of size 2x1
504 const auto& engine = get_test_engine();
506 auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 2, 1 } });
507 //auto output_prim = memory::allocate({ memory::format::xb_f32,{ 2 ,{ { 1 } }, 1 } });
508 auto weights_prim = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
509 auto bias_prim = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 1 } });
511 set_values(input_prim, { 1.f, 3.f, -2.f, -4.f });
512 set_values(weights_prim, { 1.f, -1.f, 2.0f, 0.f, 3.0f, 4.0f, 0.5f, 5.0f });
513 set_values(bias_prim, { 1.0f, -5.0f });
516 input_layout("input", input_prim.get_layout()),
517 data("weights", weights_prim),
518 data("bias", bias_prim),
519 fully_connected("full_con_prim", "input", "weights", "bias")
522 network network(engine, topology);
523 network.set_input_data("input", input_prim);
525 auto outputs = network.execute();
526 EXPECT_EQ(outputs.size(), size_t(1));
527 EXPECT_EQ(outputs.begin()->first, "full_con_prim");
529 auto output_prim = outputs.begin()->second.get_memory();
531 auto output_ptr = output_prim.pointer<float>();
533 EXPECT_EQ(10, output_ptr[0]);
534 EXPECT_EQ(-28.5, output_ptr[1]);
537 TEST(fully_connected_gpu, xb_f32_batch_1_relu) {
553 // 1.0, -2.0, 3.0, -4.0
558 const int32_t output_f = 4, // size of whole output buffer
559 input_x = 3, input_b = 1, // size of whole input buffer
560 weight_b = 4, weight_x = 3; // size of whole weights buffer
562 const auto& engine = get_test_engine();
564 auto input_prim = memory::allocate(engine, { data_types::f32,format::yxfb,{ input_b, 1, input_x, 1 } });
565 //auto output_prim = memory::allocate({ memory::format::xb_f32,{ output_b,{ { output_f } },{ 1 } } });
566 auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } });
567 auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f, 1 } });
569 set_values(input_prim, { -0.5f, 2.0f, 0.5f });
570 set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f });
571 set_values(bias_prim, { 1.0f, -2.0f, 3.0f, -4.0f });
574 input_layout("input", input_prim.get_layout()),
575 data("weights", weights_prim),
576 data("bias", bias_prim),
577 fully_connected("full_con_prim", "input", "weights", "bias", true, 0)
580 network network(engine, topology);
581 network.set_input_data("input", input_prim);
583 auto outputs = network.execute();
584 EXPECT_EQ(outputs.size(), size_t(1));
585 EXPECT_EQ(outputs.begin()->first, "full_con_prim");
587 auto output_prim = outputs.begin()->second.get_memory();
589 auto output_ptr = output_prim.pointer<float>();
591 EXPECT_EQ(2.50f, output_ptr[0]);
592 EXPECT_EQ(0.00f, output_ptr[1]);
593 EXPECT_EQ(0.75f, output_ptr[2]);
594 EXPECT_EQ(0.00f, output_ptr[3]);
597 TEST(fully_connected_gpu, xb_f32_batch_2_relu) {
613 // 1.0, -2.0, 3.0, -4.0
619 const int32_t output_f = 4, // size of whole output buffer
620 input_x = 3, input_b = 2, // size of whole input buffer
621 weight_b = 4, weight_x = 3; // size of whole weights buffer
623 const auto& engine = get_test_engine();
625 auto input_prim = memory::allocate(engine, { data_types::f32,format::yxfb,{ input_b, 1, input_x, 1 } });
626 //auto output_prim = memory::allocate({ memory::format::xb_f32,{ output_b,{ { output_f } },{ 1 } } });
627 auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } });
628 auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f,1 } });
630 set_values(input_prim, { -0.5f, 1.0f, 2.0f, 1.5f, 0.5f, 0.0f });
631 set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f });
632 set_values(bias_prim, { 1.0f, -2.0f, 3.0f, -4.0f });
635 input_layout("input", input_prim.get_layout()),
636 data("weights", weights_prim),
637 data("bias", bias_prim),
638 fully_connected("full_con_prim", "input", "weights", "bias", true, 0)
641 network network(engine, topology);
642 network.set_input_data("input", input_prim);
644 auto outputs = network.execute();
645 EXPECT_EQ(outputs.size(), size_t(1));
646 EXPECT_EQ(outputs.begin()->first, "full_con_prim");
648 auto output_prim = outputs.begin()->second.get_memory();
650 auto output_ptr = output_prim.pointer<float>();
652 EXPECT_EQ(2.50f, output_ptr[0]);
653 EXPECT_EQ(4.00f, output_ptr[1]);
654 EXPECT_EQ(0.00f, output_ptr[2]);
655 EXPECT_EQ(0.00f, output_ptr[3]);
656 EXPECT_EQ(0.75f, output_ptr[4]);
657 EXPECT_EQ(2.75f, output_ptr[5]);
658 EXPECT_EQ(0.00f, output_ptr[6]);
659 EXPECT_EQ(0.00f, output_ptr[7]);
662 TEST(fully_connected_gpu, x_f32_relu) {
677 // 1.0, -2.0, 3.0, -4.0
681 const int32_t output_f = 4, // size of whole output buffer
682 input_x = 3, // size of whole input buffer
683 weight_b = 4, weight_x = 3; // size of whole weights buffer
685 const auto& engine = get_test_engine();
687 auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,input_x,1 } });
688 //auto output_prim = memory::allocate({ memory::format::x_f32,{ 1 ,{ { output_f } }, 1 } });
689 auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } });
690 auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f,1 } });
692 set_values(input_prim, { -0.5f, 2.0f, 0.5f });
693 set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f });
694 set_values(bias_prim, { 1.0f, -2.0f, 3.0f, -4.0f });
697 input_layout("input", input_prim.get_layout()),
698 data("weights", weights_prim),
699 data("bias", bias_prim),
700 fully_connected("full_con_prim", "input", "weights", "bias", true, 0)
703 network network(engine, topology);
704 network.set_input_data("input", input_prim);
706 auto outputs = network.execute();
707 EXPECT_EQ(outputs.size(), size_t(1));
708 EXPECT_EQ(outputs.begin()->first, "full_con_prim");
710 auto output_prim = outputs.begin()->second.get_memory();
712 auto output_ptr = output_prim.pointer<float>();
714 EXPECT_EQ(2.50f, output_ptr[0]);
715 EXPECT_EQ(0.00f, output_ptr[1]);
716 EXPECT_EQ(0.75f, output_ptr[2]);
717 EXPECT_EQ(0.00f, output_ptr[3]);
720 TEST(fully_connected_gpu, x_f32_relu_with_negative_slope) {
724 // Negative Slope: 0.1
736 // 1.0, -2.0, 3.0, -4.0
738 // 2.5 -0.125 0.75 -0.1
740 const int32_t output_f = 4, // size of whole output buffer
741 input_x = 3, // size of whole input buffer
742 weight_b = 4, weight_x = 3; // size of whole weights buffer
744 const auto& engine = get_test_engine();
746 auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,input_x,1 } });
747 //auto output_prim = memory::allocate({ memory::format::x_f32,{ 1 ,{ { output_f } }, 1 } });
748 auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } });
749 auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f,1 } });
751 set_values(input_prim, { -0.5f, 2.0f, 0.5f });
752 set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f });
753 set_values(bias_prim, { 1.0f, -2.0f, 3.0f, -4.0f });
756 input_layout("input", input_prim.get_layout()),
757 data("weights", weights_prim),
758 data("bias", bias_prim),
759 fully_connected("full_con_prim", "input", "weights", "bias", true, 0.1f)
762 network network(engine, topology);
763 network.set_input_data("input", input_prim);
765 auto outputs = network.execute();
766 EXPECT_EQ(outputs.size(), size_t(1));
767 EXPECT_EQ(outputs.begin()->first, "full_con_prim");
769 auto output_prim = outputs.begin()->second.get_memory();
771 auto output_ptr = output_prim.pointer<float>();
773 EXPECT_EQ(2.50f, output_ptr[0]);
774 EXPECT_EQ(-0.125f, output_ptr[1]);
775 EXPECT_EQ(0.75f, output_ptr[2]);
776 EXPECT_EQ(-0.1f, output_ptr[3]);
779 TEST(fully_connected_gpu, b_fs_yx_fsv4)
781 const auto& engine = get_test_engine();
784 const int in_F = 2048;
788 const int W_B = 1000;
789 const int W_F = in_F;
790 const int W_Y = in_Y;
791 const int W_X = in_X;
794 std::vector<char> Data(in_F * in_B); // in_X=in_Y=1
796 std::generate(Data.begin(), Data.end(), [i]() mutable { return i++ % 9; });
797 auto input = memory::allocate(engine, {data_types::i8, format::bfyx, {in_B, in_F, in_X, in_Y}});
798 set_values(input, std::move(Data));
801 topology topology(input_layout("input", input.get_layout()));
804 topology.add(reorder("reorder_in",
806 layout(data_types::i8, format::b_fs_yx_fsv4, {in_B, in_F, in_X, in_Y})));
809 std::vector<char> Weights(W_B * W_F);
811 std::generate(Weights.begin(), Weights.end(), [W_F, i]() mutable {
812 return i % 2 ? -(i++) / W_F - 1 : (i++) / W_F + 1;
815 memory::allocate(engine, {data_types::i8, format::bfyx, {W_B, W_F, W_X, W_Y}});
817 memory::allocate(engine, {data_types::i8, format::bfyx, {W_B, W_F, W_X, W_Y}});
818 set_values(weights_gold, Weights);
819 set_values(weights_imad, std::move(Weights));
820 topology.add(data("weights_gold", weights_gold), data("weights_imad", weights_imad));
822 // Bias, Callibraiton, Quantization
823 std::vector<float> vB(in_F), vC(in_F), vQ(in_F);
825 std::generate(vB.begin(), vB.end(), [x]() mutable {
832 std::generate(vC.begin(), vC.end(), [x]() mutable {
839 std::generate(vQ.begin(), vQ.end(), [x]() mutable {
845 auto bias_gold = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, in_F, 1}});
846 auto bias_imad = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, in_F, 1}});
847 auto callib_gold = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, in_F, 1}});
848 auto callib_imad = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, in_F, 1}});
849 auto quant_gold = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, in_F, 1}});
850 auto quant_imad = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, in_F, 1}});
851 set_values(bias_gold, vB);
852 set_values(bias_imad, std::move(vB));
853 set_values(callib_gold, vC);
854 set_values(callib_imad, std::move(vC));
855 set_values(quant_gold, vQ);
856 set_values(quant_imad, std::move(vQ));
857 topology.add(data("bias_gold", bias_gold),
858 data("callib_gold", callib_gold),
859 data("quant_gold", quant_gold));
860 topology.add(data("bias_imad", bias_imad),
861 data("callib_imad", callib_imad),
862 data("quant_imad", quant_imad));
865 fully_connected fullc_gold(
866 "fullc_gold", "input", "weights_gold", {"bias_gold"}, {"quant_gold"}, {"callib_gold"}, 1.0f);
867 fully_connected fullc_imad(
868 "fullc_imad", "reorder_in", "weights_imad", {"bias_imad"}, {"quant_imad"}, {"callib_imad"}, 1.0f);
869 topology.add(fullc_gold, fullc_imad);
873 reorder("reorder_gold", fullc_gold, layout(data_types::i8, format::bfyx, {in_B, W_B, 1, 1}));
875 reorder("reorder_imad", fullc_imad, layout(data_types::i8, format::bfyx, {in_B, W_B, 1, 1}));
876 topology.add(reorder_gold, reorder_imad);
879 build_options build_opt;
880 build_opt.set_option(build_option::optimize_data(true));
881 network network(engine, topology, build_opt);
884 network.set_input_data("input", input);
885 auto outputs = network.execute();
887 auto out_gold = outputs.find("reorder_gold");
888 auto out_test = outputs.find("reorder_imad");
890 ASSERT_NE(out_gold, outputs.end());
891 ASSERT_NE(out_test, outputs.end());
892 auto gold_ptr = out_gold->second.get_memory().pointer<char>();
893 auto test_ptr = out_test->second.get_memory().pointer<char>();
895 ASSERT_EQ(gold_ptr.size(), test_ptr.size());
896 for (size_t i = 0; i < gold_ptr.size(); i++)
898 ASSERT_EQ(gold_ptr[i], test_ptr[i]);