2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include <gtest/gtest.h>
19 #include "api/CPP/memory.hpp"
20 #include <api/CPP/input_layout.hpp>
21 #include "api/CPP/batch_norm.hpp"
22 #include <api/CPP/topology.hpp>
23 #include <api/CPP/network.hpp>
24 #include <api/CPP/engine.hpp>
25 #include "test_utils/test_utils.h"
26 #include <api/CPP/reorder.hpp>
27 #include <api/CPP/data.hpp>
28 #include <api/CPP/mutable_data.hpp>
30 using namespace cldnn;
31 using namespace tests;
33 TEST(batch_normalization_gpu, basic_in2x3x2x2) {
39 // f0: b0: 1 2 -10 b1: 0 0 -11
40 // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
41 // f1: b0: 5 6 -12 b1: 1.5 5.2 -13
42 // f1: b0: 7 8 -16 b1: 12 9 -17
53 const auto& engine = get_test_engine();
55 auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } });
56 auto mean = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 1, 1 } });
57 auto variance = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 1, 1 } });
59 float epsilon = 0.0001f;
62 topology.add(input_layout("input", input.get_layout()));
63 topology.add(data("mean", mean));
64 topology.add(data("variance", variance));
65 topology.add(batch_norm("batch_norm", "input", "mean", "variance", epsilon));
70 -10.f, -11.f, -12.f, -13.f,
73 -14.f, -15.f, -16.f, -17.f
76 set_values(mean, { -3.3333f, -0.3583f });
77 set_values(variance, { 44.9305f, 107.0624f });
79 network network(engine, topology);
81 network.set_input_data("input", input);
83 auto outputs = network.execute();
85 auto output = outputs.at("batch_norm").get_memory();
86 auto output_ptr = output.pointer<float>();
88 for (int j = 0; j < 2; ++j) { //F
89 float sum = 0, var = 0;
90 for (int i = 0; i < 2; ++i) { //B
91 for (int k = 0; k < 2; ++k) { //Y
92 for (int l = 0; l < 3; ++l) { //X
93 float data = output_ptr[i + 2*j + 2*2*l + 2*2*3*k];
102 EXPECT_NEAR(sum, 0, 1e-03F);
103 EXPECT_NEAR(var, 1, 1e-03F);
107 TEST(batch_normalization_gpu, basic_in2x3x2x2_scale_shift) {
113 // f0: b0: 1 2 -10 b1: 0 0 -11
114 // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
115 // f1: b0: 5 6 -12 b1: 1.5 5.2 -13
116 // f1: b0: 7 8 -16 b1: 12 9 -17
136 const auto& engine = get_test_engine();
138 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
139 auto mean = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
140 auto variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
141 auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
142 auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
144 float epsilon = 0.0001f;
147 topology.add(input_layout("input", input.get_layout()));
148 topology.add(data("mean", mean));
149 topology.add(data("variance", variance));
150 topology.add(data("scale", scale));
151 topology.add(data("shift", shift));
152 topology.add(batch_norm("batch_norm", "input", "mean", "variance", "scale", "shift", epsilon));
157 -10.f, -11.f, -12.f, -13.f,
158 3.f, 0.5f, 7.f, 12.f,
159 4.f, -0.5f, 8.f, 9.f,
160 -14.f, -15.f, -16.f, -17.f
163 set_values(mean, { -3.3333f, -0.3583f });
164 set_values(variance, { 44.9305f, 107.0624f });
165 set_values(scale, { 2.f, 1.f });
166 set_values(shift, { 0.f, 5.f });
168 network network(engine, topology);
170 network.set_input_data("input", input);
172 auto outputs = network.execute();
174 auto output = outputs.at("batch_norm").get_memory();
175 auto output_ptr = output.pointer<float>();
177 for (int j = 0; j < 2; ++j) { //F
178 float sum = 0, var = 0;
180 auto scalep = scale.pointer<float>();
181 auto shiftp = shift.pointer<float>();
182 float scalef = scalep[j];
183 float shiftf = shiftp[j];
185 for (int i = 0; i < 2; ++i) { //B
186 for (int k = 0; k < 2; ++k) { //Y
187 for (int l = 0; l < 3; ++l) { //X
188 float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
189 data = (data - shiftf) / scalef;
198 EXPECT_NEAR(sum, 0, 1e-03F);
199 EXPECT_NEAR(var, 1, 1e-03F);
203 TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_calc) {
209 // f0: b0: 1 2 -10 b1: 0 0 -11
210 // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
211 // f1: b0: 5 6 -12 b1: 1.5 5.2 -13
212 // f1: b0: 7 8 -16 b1: 12 9 -17
223 const auto& engine = get_test_engine();
225 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
226 auto inv_variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
228 float epsilon = 0.0001f;
231 topology.add(input_layout("input", input.get_layout()));
232 topology.add(mutable_data("inv_variance", inv_variance));
233 topology.add(batch_norm("batch_norm", "input", epsilon, "inv_variance"));
238 -10.f, -11.f, -12.f, -13.f,
239 3.f, 0.5f, 7.f, 12.f,
240 4.f, -0.5f, 8.f, 9.f,
241 -14.f, -15.f, -16.f, -17.f
244 network network(engine, topology);
246 network.set_input_data("input", input);
248 auto outputs = network.execute();
250 auto output = outputs.at("batch_norm").get_memory();
251 auto output_ptr = output.pointer<float>();
253 for (int j = 0; j < 2; ++j) { //F
254 float sum = 0, var = 0;
255 for (int i = 0; i < 2; ++i) { //B
256 for (int k = 0; k < 2; ++k) { //Y
257 for (int l = 0; l < 3; ++l) { //X
258 float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
267 EXPECT_NEAR(sum, 0, 1e-03F);
268 EXPECT_NEAR(var, 1, 1e-03F);
272 TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_calc_no_inv_var) {
278 // f0: b0: 1 2 -10 b1: 0 0 -11
279 // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
280 // f1: b0: 5 6 -12 b1: 1.5 5.2 -13
281 // f1: b0: 7 8 -16 b1: 12 9 -17
292 const auto& engine = get_test_engine();
294 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
296 float epsilon = 0.0001f;
299 topology.add(input_layout("input", input.get_layout()));
300 topology.add(batch_norm("batch_norm", "input", epsilon));
305 -10.f, -11.f, -12.f, -13.f,
306 3.f, 0.5f, 7.f, 12.f,
307 4.f, -0.5f, 8.f, 9.f,
308 -14.f, -15.f, -16.f, -17.f
311 network network(engine, topology);
313 network.set_input_data("input", input);
315 auto outputs = network.execute();
317 auto output = outputs.at("batch_norm").get_memory();
318 auto output_ptr = output.pointer<float>();
320 for (int j = 0; j < 2; ++j) { //F
321 float sum = 0, var = 0;
322 for (int i = 0; i < 2; ++i) { //B
323 for (int k = 0; k < 2; ++k) { //Y
324 for (int l = 0; l < 3; ++l) { //X
325 float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
334 EXPECT_NEAR(sum, 0, 1e-03F);
335 EXPECT_NEAR(var, 1, 1e-03F);
339 TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_calc_scale_shift) {
345 // f0: b0: 1 2 -10 b1: 0 0 -11
346 // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
347 // f1: b0: 5 6 -12 b1: 1.5 5.2 -13
348 // f1: b0: 7 8 -16 b1: 12 9 -17
367 const auto& engine = get_test_engine();
369 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
370 auto mean = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
371 auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
372 auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
373 auto inv_variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
375 float epsilon = 0.0001f;
378 topology.add(input_layout("input", input.get_layout()));
379 topology.add(data("scale", scale));
380 topology.add(data("shift", shift));
381 topology.add(mutable_data("inv_variance", inv_variance));
382 topology.add(batch_norm("batch_norm", "input", epsilon, "scale", "shift", "inv_variance"));
387 -10.f, -11.f, -12.f, -13.f,
388 3.f, 0.5f, 7.f, 12.f,
389 4.f, -0.5f, 8.f, 9.f,
390 -14.f, -15.f, -16.f, -17.f
393 set_values(scale, { 2.f, 1.f });
394 set_values(shift, { 0.f, 5.f });
396 network network(engine, topology);
398 network.set_input_data("input", input);
400 auto outputs = network.execute();
402 auto output = outputs.at("batch_norm").get_memory();
403 auto output_ptr = output.pointer<float>();
405 for (int j = 0; j < 2; ++j) { //F
406 float sum = 0, var = 0;
408 auto scalep = scale.pointer<float>();
409 auto shiftp = shift.pointer<float>();
410 float scalef = scalep[j];
411 float shiftf = shiftp[j];
413 for (int i = 0; i < 2; ++i) { //B
414 for (int k = 0; k < 2; ++k) { //Y
415 for (int l = 0; l < 3; ++l) { //X
416 float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
417 data = (data - shiftf) / scalef;
426 EXPECT_NEAR(sum, 0, 1e-03F);
427 EXPECT_NEAR(var, 1, 1e-03F);
431 TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_calc_scale_shift_no_inv_var) {
437 // f0: b0: 1 2 -10 b1: 0 0 -11
438 // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
439 // f1: b0: 5 6 -12 b1: 1.5 5.2 -13
440 // f1: b0: 7 8 -16 b1: 12 9 -17
459 const auto& engine = get_test_engine();
461 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
462 auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
463 auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
465 float epsilon = 0.0001f;
468 topology.add(input_layout("input", input.get_layout()));
469 topology.add(data("scale", scale));
470 topology.add(data("shift", shift));
471 topology.add(batch_norm("batch_norm", "input", epsilon, "scale", "shift"));
476 -10.f, -11.f, -12.f, -13.f,
477 3.f, 0.5f, 7.f, 12.f,
478 4.f, -0.5f, 8.f, 9.f,
479 -14.f, -15.f, -16.f, -17.f
482 set_values(scale, { 2.f, 1.f });
483 set_values(shift, { 0.f, 5.f });
485 network network(engine, topology);
487 network.set_input_data("input", input);
489 auto outputs = network.execute();
491 auto output = outputs.at("batch_norm").get_memory();
492 auto output_ptr = output.pointer<float>();
494 for (int j = 0; j < 2; ++j) { //F
495 float sum = 0, var = 0;
497 auto scalep = scale.pointer<float>();
498 auto shiftp = shift.pointer<float>();
499 float scalef = scalep[j];
500 float shiftf = shiftp[j];
502 for (int i = 0; i < 2; ++i) { //B
503 for (int k = 0; k < 2; ++k) { //Y
504 for (int l = 0; l < 3; ++l) { //X
505 float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
506 data = (data - shiftf) / scalef;
515 EXPECT_NEAR(sum, 0, 1e-03F);
516 EXPECT_NEAR(var, 1, 1e-03F);
520 TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_outputs) {
526 // f0: b0: 1 2 -10 b1: 0 0 -11
527 // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
528 // f1: b0: 5 6 -12 b1: 1.5 5.2 -13
529 // f1: b0: 7 8 -16 b1: 12 9 -17
531 // Mean (to be calculated)
535 // Variance (to be calculated)
548 const auto& engine = get_test_engine();
550 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
551 auto mean_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
552 auto variance_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
553 auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
554 auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
555 auto inv_variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
557 float epsilon = 0.0001f;
560 topology.add(input_layout("input", input.get_layout()));
561 topology.add(data("scale", scale));
562 topology.add(data("shift", shift));
563 topology.add(mutable_data("mean_out", mean_out));
564 topology.add(mutable_data("variance_out", variance_out));
565 topology.add(mutable_data("inv_variance", inv_variance));
566 topology.add(batch_norm("batch_norm", "input", epsilon, "mean_out", "variance_out", "scale", "shift", "inv_variance"));
571 -10.f, -11.f, -12.f, -13.f,
572 3.f, 0.5f, 7.f, 12.f,
573 4.f, -0.5f, 8.f, 9.f,
574 -14.f, -15.f, -16.f, -17.f
577 set_values(scale, { 2.f, 1.f });
578 set_values(shift, { 0.f, 5.f });
580 network network(engine, topology);
582 network.set_input_data("input", input);
584 auto outputs = network.execute();
586 auto output = outputs.at("batch_norm").get_memory();
587 auto output_ptr = output.pointer<float>();
589 std::vector<float> mean_ref = { -3.3333f, -0.3583f };
590 std::vector<float> val_ref = { 44.9305f, 107.0624f };
592 for (int j = 0; j < 2; ++j) { //F
593 float sum = 0, var = 0;
595 auto scalep = scale.pointer<float>();
596 auto shiftp = shift.pointer<float>();
597 float scalef = scalep[j];
598 float shiftf = shiftp[j];
600 auto meanp = mean_out.pointer<float>();
601 auto varp = variance_out.pointer<float>();
602 float meanf = meanp[j];
603 float varf = varp[j];
605 for (int i = 0; i < 2; ++i) { //B
606 for (int k = 0; k < 2; ++k) { //Y
607 for (int l = 0; l < 3; ++l) { //X
608 float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
609 data = (data - shiftf) / scalef;
618 EXPECT_NEAR(sum, 0, 1e-03F);
619 EXPECT_NEAR(var, 1, 1e-03F);
621 EXPECT_NEAR(meanf, mean_ref[j], 1e-03F);
622 EXPECT_NEAR(varf, val_ref[j], 1e-03F);
626 TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_outputs_no_inv_var) {
632 // f0: b0: 1 2 -10 b1: 0 0 -11
633 // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
634 // f1: b0: 5 6 -12 b1: 1.5 5.2 -13
635 // f1: b0: 7 8 -16 b1: 12 9 -17
637 // Mean (to be calculated)
641 // Variance (to be calculated)
654 const auto& engine = get_test_engine();
656 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
657 auto mean_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
658 auto variance_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
659 auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
660 auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
662 float epsilon = 0.0001f;
665 topology.add(input_layout("input", input.get_layout()));
666 topology.add(data("scale", scale));
667 topology.add(data("shift", shift));
668 topology.add(mutable_data("mean_out", mean_out));
669 topology.add(mutable_data("variance_out", variance_out));
670 topology.add(batch_norm("batch_norm", "input", epsilon, "mean_out", "variance_out", "scale", "shift"));
675 -10.f, -11.f, -12.f, -13.f,
676 3.f, 0.5f, 7.f, 12.f,
677 4.f, -0.5f, 8.f, 9.f,
678 -14.f, -15.f, -16.f, -17.f
681 set_values(scale, { 2.f, 1.f });
682 set_values(shift, { 0.f, 5.f });
684 network network(engine, topology);
686 network.set_input_data("input", input);
688 auto outputs = network.execute();
690 auto output = outputs.at("batch_norm").get_memory();
691 auto output_ptr = output.pointer<float>();
693 std::vector<float> mean_ref = { -3.3333f, -0.3583f };
694 std::vector<float> val_ref = { 44.9305f, 107.0624f };
696 for (int j = 0; j < 2; ++j) { //F
697 float sum = 0, var = 0;
699 auto scalep = scale.pointer<float>();
700 auto shiftp = shift.pointer<float>();
701 float scalef = scalep[j];
702 float shiftf = shiftp[j];
704 auto meanp = mean_out.pointer<float>();
705 auto varp = variance_out.pointer<float>();
706 float meanf = meanp[j];
707 float varf = varp[j];
709 for (int i = 0; i < 2; ++i) { //B
710 for (int k = 0; k < 2; ++k) { //Y
711 for (int l = 0; l < 3; ++l) { //X
712 float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
713 data = (data - shiftf) / scalef;
722 EXPECT_NEAR(sum, 0, 1e-03F);
723 EXPECT_NEAR(var, 1, 1e-03F);
725 EXPECT_NEAR(meanf, mean_ref[j], 1e-03F);
726 EXPECT_NEAR(varf, val_ref[j], 1e-03F);
730 TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_outputs_error_out_type) {
731 const auto& engine = get_test_engine();
733 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
734 auto mean_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
735 auto variance_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
736 auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
737 auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
738 auto inv_variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
740 float epsilon = 0.0001f;
743 topology.add(input_layout("input", input.get_layout()));
744 topology.add(data("scale", scale));
745 topology.add(data("shift", shift));
746 topology.add(data("mean_out", mean_out));
747 topology.add(data("variance_out", variance_out));
748 topology.add(data("inv_variance", inv_variance));
749 topology.add(batch_norm("batch_norm", "input", epsilon, "mean_out", "variance_out", "scale", "shift", "inv_variance"));
751 EXPECT_ANY_THROW(network(engine, topology));
754 TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_outputs_error_non_equal_types) {
755 const auto& engine = get_test_engine();
757 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
758 auto mean_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
759 auto variance_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
760 auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
761 auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
762 auto inv_variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
764 float epsilon = 0.0001f;
767 topology.add(input_layout("input", input.get_layout()));
768 topology.add(data("scale", scale));
769 topology.add(data("shift", shift));
770 topology.add(data("mean_out", mean_out));
771 topology.add(mutable_data("variance_out", variance_out));
772 topology.add(mutable_data("inv_variance", inv_variance));
773 topology.add(batch_norm("batch_norm", "input", epsilon, "mean_out", "variance_out", "scale", "shift", "inv_variance"));
775 EXPECT_ANY_THROW(network(engine, topology));
779 TEST(batch_normalization_gpu, basic_in2x2x3x2_bfyx) {
785 // f0: b0: 1 2 -10 b1: 0 0 -11
786 // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
787 // f1: b0: 5 6 -12 b1: 1.5 5.2 -13
788 // f1: b0: 7 8 -16 b1: 12 9 -17
799 const auto& engine = get_test_engine();
801 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
802 auto mean = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
803 auto variance = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
805 float epsilon = 0.0001f;
808 topology.add(input_layout("input", input.get_layout()));
809 topology.add(data("mean", mean));
810 topology.add(data("variance", variance));
811 topology.add(batch_norm("batch_norm", "input", "mean", "variance", epsilon));
814 1.f, 2.f, -10.f, 3.f,
815 4.f, -14.f, 5.f, 6.f,
816 -12.f, 7.f, 8.f, -16.f,
817 0.f, 0.f, -11.f, 0.5f,
818 -0.5f, -15.f, 1.5f, 5.2f,
819 -13.f, 12.f, 9.f, -17.f
822 // f0: b0: 1 2 -10 b1: 0 0 -11
823 // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
824 // f1: b0: 5 6 -12 b1: 1.5 5.2 -13
825 // f1: b0: 7 8 -16 b1: 12 9 -17
827 set_values(mean, { -3.3333f, -0.3583f });
828 set_values(variance, { 44.9305f, 107.0624f });
830 network network(engine, topology);
832 network.set_input_data("input", input);
834 auto outputs = network.execute();
836 auto output = outputs.at("batch_norm").get_memory();
837 auto output_ptr = output.pointer<float>();
839 for (int j = 0; j < 2; ++j) { //F
840 float sum = 0, var = 0;
841 for (int i = 0; i < 2; ++i) { //B
842 for (int k = 0; k < 2; ++k) { //Y
843 for (int l = 0; l < 3; ++l) { //X
844 float data = output_ptr[l + k * 3 + j * 2 * 3 + i * 2 * 2 * 3];
853 EXPECT_NEAR(sum, 0, 1e-03F);
854 EXPECT_NEAR(var, 1, 1e-03F);
858 TEST(batch_normalization_gpu, basic_in2x2x3x2_bfyx_padding) {
862 // Input padding : 1x2
863 // Output padding : 2x1
866 // f0: b0: 1 2 -10 b1: 0 0 -11
867 // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
868 // f1: b0: 5 6 -12 b1: 1.5 5.2 -13
869 // f1: b0: 7 8 -16 b1: 12 9 -17
880 const auto& engine = get_test_engine();
882 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
883 auto mean = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
884 auto variance = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
886 float epsilon = 0.0001f;
889 topology.add(input_layout("input", input.get_layout()));
890 topology.add(data("mean", mean));
891 topology.add(data("variance", variance));
892 topology.add(reorder("reorder", "input", input.get_layout().with_padding({ { 0, 0, 1, 2 }, 0 })));
893 topology.add(batch_norm("batch_norm", "reorder", "mean", "variance", epsilon, padding({ 0, 0, 2, 1 }, 0)));
896 1.f, 2.f, -10.f, 3.f,
897 4.f, -14.f, 5.f, 6.f,
898 -12.f, 7.f, 8.f, -16.f,
899 0.f, 0.f, -11.f, 0.5f,
900 -0.5f, -15.f, 1.5f, 5.2f,
901 -13.f, 12.f, 9.f, -17.f
904 // f0: b0: 1 2 -10 b1: 0 0 -11
905 // f0: b0: 3 4 -14 b1: 0.5 -0.5 -15
906 // f1: b0: 5 6 -12 b1: 1.5 5.2 -13
907 // f1: b0: 7 8 -16 b1: 12 9 -17
909 set_values(mean, { -3.3333f, -0.3583f });
910 set_values(variance, { 44.9305f, 107.0624f });
912 network network(engine, topology);
914 network.set_input_data("input", input);
916 auto outputs = network.execute();
918 auto output = outputs.at("batch_norm").get_memory();
919 auto output_ptr = output.pointer<float>();
921 for (int j = 0; j < 2; ++j) { //F
922 float sum = 0, var = 0;
923 for (int i = 0; i < 2; ++i) { //B
924 for (int k = 0; k < 2; ++k) { //Y
925 for (int l = 0; l < 3; ++l) { //X
926 float data = output_ptr[l + 2 + 7 * (k + 1 + 4 * (j + 2 * i))];
935 EXPECT_NEAR(sum, 0, 1e-03F);
936 EXPECT_NEAR(var, 1, 1e-03F);
940 TEST(batch_normalization_gpu, basic_to_string) {
941 const auto& engine = get_test_engine();
943 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
945 auto mean = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
946 auto variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
948 auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
949 auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
951 auto inv_variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
953 auto mean_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
954 auto variance_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
956 float epsilon = 0.0001f;
959 topology.add(input_layout("input", input.get_layout()));
961 topology.add(data("mean", mean));
962 topology.add(data("variance", variance));
964 topology.add(data("scale", scale));
965 topology.add(data("shift", shift));
967 topology.add(mutable_data("inv_variance", inv_variance));
969 topology.add(mutable_data("mean_out", mean_out));
970 topology.add(mutable_data("variance_out", variance_out));
972 topology.add(batch_norm("batch_norm0", "input", "mean", "variance", epsilon));
973 topology.add(batch_norm("batch_norm1", "input", "mean", "variance", "scale", "shift", epsilon));
974 topology.add(batch_norm("batch_norm2", "input", epsilon));
975 topology.add(batch_norm("batch_norm3", "input", epsilon, "inv_variance"));
976 topology.add(batch_norm("batch_norm4", "input", epsilon, "scale", "shift"));
977 topology.add(batch_norm("batch_norm5", "input", epsilon, "scale", "shift", "inv_variance"));
978 topology.add(batch_norm("batch_norm6", "input", epsilon, "mean_out", "variance_out", "scale", "shift" ));
979 topology.add(batch_norm("batch_norm7", "input", epsilon, "mean_out", "variance_out", "scale", "shift", "inv_variance"));
981 network network(engine, topology);
983 size_t zero_length = 0;
985 EXPECT_NE(network.get_primitive_info("batch_norm0").length(), zero_length);
986 EXPECT_NE(network.get_primitive_info("batch_norm1").length(), zero_length);
987 EXPECT_NE(network.get_primitive_info("batch_norm2").length(), zero_length);
988 EXPECT_NE(network.get_primitive_info("batch_norm3").length(), zero_length);
989 EXPECT_NE(network.get_primitive_info("batch_norm4").length(), zero_length);
990 EXPECT_NE(network.get_primitive_info("batch_norm5").length(), zero_length);
991 EXPECT_NE(network.get_primitive_info("batch_norm6").length(), zero_length);
992 EXPECT_NE(network.get_primitive_info("batch_norm7").length(), zero_length);
996 TEST(batch_normalization_gpu, basic_in2x3x2x2_yxfb_scale_shift_different_shapes) {
997 const auto& engine = get_test_engine();
999 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
1000 auto mean = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 1, 1, 1 } });
1001 auto variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
1002 auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 1 } });
1003 auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 1, 2 } });
1005 float epsilon = 0.0001f;
1008 topology.add(input_layout("input", input.get_layout()));
1009 topology.add(data("mean", mean));
1010 topology.add(data("variance", variance));
1011 topology.add(data("scale", scale));
1012 topology.add(data("shift", shift));
1013 topology.add(batch_norm("batch_norm", "input", "mean", "variance", "scale", "shift", epsilon));
1016 1.f, 0.f, 5.f, 1.5f,
1017 2.f, 0.f, 6.f, 5.2f,
1018 -10.f, -11.f, -12.f, -13.f,
1019 3.f, 0.5f, 7.f, 12.f,
1020 4.f, -0.5f, 8.f, 9.f,
1021 -14.f, -15.f, -16.f, -17.f
1024 set_values(mean, { -3.3333f, -0.3583f });
1025 set_values(variance, { 44.9305f, 107.0624f });
1026 set_values(scale, { 2.f, 1.f });
1027 set_values(shift, { 0.f, 5.f });
1029 network network(engine, topology);
1031 network.set_input_data("input", input);
1033 auto outputs = network.execute();
1035 auto output = outputs.at("batch_norm").get_memory();
1036 auto output_ptr = output.pointer<float>();
1038 for (int j = 0; j < 2; ++j) { //F
1039 float sum = 0, var = 0;
1041 auto scalep = scale.pointer<float>();
1042 auto shiftp = shift.pointer<float>();
1043 float scalef = scalep[j];
1044 float shiftf = shiftp[j];
1046 for (int i = 0; i < 2; ++i) { //B
1047 for (int k = 0; k < 2; ++k) { //Y
1048 for (int l = 0; l < 3; ++l) { //X
1049 float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
1050 data = (data - shiftf) / scalef;
1059 EXPECT_NEAR(sum, 0, 1e-03F);
1060 EXPECT_NEAR(var, 1, 1e-03F);
1064 TEST(batch_normalization_gpu, basic_in2x3x2x2_yxfb_scale_shift_different_shapes_input_layouts) {
1065 const auto& engine = get_test_engine();
1067 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
1068 auto mean = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 1, 1, 1 } });
1069 auto variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
1070 auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 1 } });
1071 auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 1, 2 } });
1073 float epsilon = 0.0001f;
1076 topology.add(input_layout("input", input.get_layout()));
1077 topology.add(input_layout("mean", mean.get_layout()));
1078 topology.add(input_layout("variance", variance.get_layout()));
1079 topology.add(input_layout("scale", scale.get_layout()));
1080 topology.add(input_layout("shift", shift.get_layout()));
1081 topology.add(batch_norm("batch_norm", "input", "mean", "variance", "scale", "shift", epsilon));
1084 1.f, 0.f, 5.f, 1.5f,
1085 2.f, 0.f, 6.f, 5.2f,
1086 -10.f, -11.f, -12.f, -13.f,
1087 3.f, 0.5f, 7.f, 12.f,
1088 4.f, -0.5f, 8.f, 9.f,
1089 -14.f, -15.f, -16.f, -17.f
1092 set_values(mean, { -3.3333f, -0.3583f });
1093 set_values(variance, { 44.9305f, 107.0624f });
1094 set_values(scale, { 2.f, 1.f });
1095 set_values(shift, { 0.f, 5.f });
1097 network network(engine, topology);
1099 network.set_input_data("input", input);
1100 network.set_input_data("mean", mean);
1101 network.set_input_data("variance", variance);
1102 network.set_input_data("scale", scale);
1103 network.set_input_data("shift", shift);
1105 auto outputs = network.execute();
1107 auto output = outputs.at("batch_norm").get_memory();
1108 auto output_ptr = output.pointer<float>();
1110 for (int j = 0; j < 2; ++j) { //F
1111 float sum = 0, var = 0;
1113 auto scalep = scale.pointer<float>();
1114 auto shiftp = shift.pointer<float>();
1115 float scalef = scalep[j];
1116 float shiftf = shiftp[j];
1118 for (int i = 0; i < 2; ++i) { //B
1119 for (int k = 0; k < 2; ++k) { //Y
1120 for (int l = 0; l < 3; ++l) { //X
1121 float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
1122 data = (data - shiftf) / scalef;
1131 EXPECT_NEAR(sum, 0, 1e-03F);
1132 EXPECT_NEAR(var, 1, 1e-03F);
1136 TEST(batch_normalization_gpu, basic_in2x3x2x2_yxfb_with_var_mean_outputs_no_inv_var_different_shapes) {
1137 const auto& engine = get_test_engine();
1139 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
1140 auto mean_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 1, 1, 1 } });
1141 auto variance_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
1142 auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 1 } });
1143 auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 1, 2 } });
1145 float epsilon = 0.0001f;
1148 topology.add(input_layout("input", input.get_layout()));
1149 topology.add(data("scale", scale));
1150 topology.add(data("shift", shift));
1151 topology.add(mutable_data("mean_out", mean_out));
1152 topology.add(mutable_data("variance_out", variance_out));
1153 topology.add(batch_norm("batch_norm", "input", epsilon, "mean_out", "variance_out", "scale", "shift"));
1156 1.f, 0.f, 5.f, 1.5f,
1157 2.f, 0.f, 6.f, 5.2f,
1158 -10.f, -11.f, -12.f, -13.f,
1159 3.f, 0.5f, 7.f, 12.f,
1160 4.f, -0.5f, 8.f, 9.f,
1161 -14.f, -15.f, -16.f, -17.f
1164 set_values(scale, { 2.f, 1.f });
1165 set_values(shift, { 0.f, 5.f });
1167 network network(engine, topology);
1169 network.set_input_data("input", input);
1171 auto outputs = network.execute();
1173 auto output = outputs.at("batch_norm").get_memory();
1174 auto output_ptr = output.pointer<float>();
1176 std::vector<float> mean_ref = { -3.3333f, -0.3583f };
1177 std::vector<float> val_ref = { 44.9305f, 107.0624f };
1179 for (int j = 0; j < 2; ++j) { //F
1180 float sum = 0, var = 0;
1182 auto scalep = scale.pointer<float>();
1183 auto shiftp = shift.pointer<float>();
1184 float scalef = scalep[j];
1185 float shiftf = shiftp[j];
1187 auto meanp = mean_out.pointer<float>();
1188 auto varp = variance_out.pointer<float>();
1189 float meanf = meanp[j];
1190 float varf = varp[j];
1192 for (int i = 0; i < 2; ++i) { //B
1193 for (int k = 0; k < 2; ++k) { //Y
1194 for (int l = 0; l < 3; ++l) { //X
1195 float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
1196 data = (data - shiftf) / scalef;
1205 EXPECT_NEAR(sum, 0, 1e-03F);
1206 EXPECT_NEAR(var, 1, 1e-03F);
1208 EXPECT_NEAR(meanf, mean_ref[j], 1e-03F);
1209 EXPECT_NEAR(varf, val_ref[j], 1e-03F);
1213 TEST(batch_normalization_gpu, basic_in2x2x3x2_byxf_scale_shift_different_shapes) {
1214 const auto& engine = get_test_engine();
1216 auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 2, 3, 2 } });
1217 auto mean = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 1, 1, 1 } });
1218 auto variance = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 2, 1, 1 } });
1219 auto scale = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 2, 1 } });
1220 auto shift = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 1, 2 } });
1222 float epsilon = 0.0001f;
1225 topology.add(input_layout("input", input.get_layout()));
1226 topology.add(data("mean", mean));
1227 topology.add(data("variance", variance));
1228 topology.add(data("scale", scale));
1229 topology.add(data("shift", shift));
1230 topology.add(batch_norm("batch_norm", "input", "mean", "variance", "scale", "shift", epsilon));
1233 1.f, 5.f, 2.f, 6.f, -10.f, -12.f,
1234 3.f, 7.f, 4.f, 8.f, -14.f, -16.f,
1235 0.f, 1.5f, 0.f, 5.2f, -11.f, -13.f,
1236 0.5f, 12.f, -0.5f, 9.f, -15.f, -17.f
1239 set_values(mean, { -3.3333f, -0.3583f });
1240 set_values(variance, { 44.9305f, 107.0624f });
1241 set_values(scale, { 2.f, 1.f });
1242 set_values(shift, { 0.f, 5.f });
1244 std::vector<float> expected_result{
1245 0.646469f, 0.517855f, 0.795655f, 0.614501f, -0.99458f, -1.12512f,
1246 0.944842f, 0.711146f, 1.09403f, 0.807792f, -1.59133f, -1.5117f,
1247 0.497283f, 0.179596f, 0.497283f, 0.537184f, -1.14377f, -1.22176f,
1248 0.571876f, 1.19437f, 0.42269f, 0.904437f, -1.74051f, -1.60834f
1251 network network(engine, topology);
1253 network.set_input_data("input", input);
1255 auto outputs = network.execute();
1257 auto output = outputs.at("batch_norm").get_memory();
1258 auto output_ptr = output.pointer<float>();
1260 for (int j = 0; j < 2; ++j) { //F
1261 float sum = 0, var = 0;
1263 auto scalep = scale.pointer<float>();
1264 auto shiftp = shift.pointer<float>();
1265 float scalef = scalep[j];
1266 float shiftf = shiftp[j];
1268 for (int i = 0; i < 2; ++i) { //B
1269 for (int k = 0; k < 2; ++k) { //Y
1270 for (int l = 0; l < 3; ++l) { //X
1271 auto index = 12 * i + 6 * k + 2 * l + j;
1272 float data = output_ptr[index];
1273 data = (data - shiftf) / scalef;
1274 EXPECT_NEAR(data, expected_result[index], 1e-3F);
1283 EXPECT_NEAR(sum, 0, 1e-03F);
1284 EXPECT_NEAR(var, 1, 1e-03F);
1288 TEST(batch_normalization_gpu, basic_in2x2x3x2_byxf_with_var_mean_outputs_no_inv_var_different_shapes) {
1289 const auto& engine = get_test_engine();
1291 auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 2, 3, 2 } });
1292 auto mean_out = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 1, 1, 1 } });
1293 auto variance_out = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 2, 1, 1 } });
1294 auto scale = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 2, 1 } });
1295 auto shift = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 1, 2 } });
1297 float epsilon = 0.0001f;
1300 topology.add(input_layout("input", input.get_layout()));
1301 topology.add(data("scale", scale));
1302 topology.add(data("shift", shift));
1303 topology.add(mutable_data("mean_out", mean_out));
1304 topology.add(mutable_data("variance_out", variance_out));
1305 topology.add(batch_norm("batch_norm", "input", epsilon, "mean_out", "variance_out", "scale", "shift"));
1308 1.f, 5.f, 2.f, 6.f, -10.f, -12.f,
1309 3.f, 7.f, 4.f, 8.f, -14.f, -16.f,
1310 0.f, 1.5f, 0.f, 5.2f, -11.f, -13.f,
1311 0.5f, 12.f, -0.5f, 9.f, -15.f, -17.f
1314 set_values(scale, { 2.f, 1.f });
1315 set_values(shift, { 0.f, 5.f });
1317 network network(engine, topology);
1319 network.set_input_data("input", input);
1321 auto outputs = network.execute();
1323 auto output = outputs.at("batch_norm").get_memory();
1324 auto output_ptr = output.pointer<float>();
1326 std::vector<float> mean_ref = { -3.3333f, -0.3583f };
1327 std::vector<float> val_ref = { 44.9305f, 107.0624f };
1329 std::vector<float> expected_result{
1330 0.646469f, 0.517855f, 0.795655f, 0.614501f, -0.99458f, -1.12512f,
1331 0.944842f, 0.711146f, 1.09403f, 0.807792f, -1.59133f, -1.5117f,
1332 0.497283f, 0.179596f, 0.497283f, 0.537184f, -1.14377f, -1.22176f,
1333 0.571876f, 1.19437f, 0.42269f, 0.904437f, -1.74051f, -1.60834f
1336 for (int j = 0; j < 2; ++j) { //F
1337 float sum = 0, var = 0;
1339 auto scalep = scale.pointer<float>();
1340 auto shiftp = shift.pointer<float>();
1341 float scalef = scalep[j];
1342 float shiftf = shiftp[j];
1344 auto meanp = mean_out.pointer<float>();
1345 auto varp = variance_out.pointer<float>();
1346 float meanf = meanp[j];
1347 float varf = varp[j];
1349 for (int i = 0; i < 2; ++i) { //B
1350 for (int k = 0; k < 2; ++k) { //Y
1351 for (int l = 0; l < 3; ++l) { //X
1352 auto index = 12 * i + 6 * k + 2 * l + j;
1353 float data = output_ptr[index];
1354 data = (data - shiftf) / scalef;
1355 EXPECT_NEAR(data, expected_result[index], 1e-3F);
1364 EXPECT_NEAR(sum, 0, 1e-03F);
1365 EXPECT_NEAR(var, 1, 1e-03F);
1367 EXPECT_NEAR(meanf, mean_ref[j], 1e-03F);
1368 EXPECT_NEAR(varf, val_ref[j], 1e-03F);
1373 TEST(batch_normalization_gpu, basic_in2x3x5x2_yxfb_scale_shift_different_shapes) {
1374 const auto& engine = get_test_engine();
1376 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 5, 3, 2 } });
1377 auto mean = memory::allocate(engine, { data_types::f32, format::yxfb,{ 5, 1, 1, 1 } });
1378 auto variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 5, 1, 1 } });
1379 auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 5, 1 } });
1380 auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 1, 5 } });
1382 float epsilon = 0.0001f;
1385 topology.add(input_layout("input", input.get_layout()));
1386 topology.add(data("mean", mean));
1387 topology.add(data("variance", variance));
1388 topology.add(data("scale", scale));
1389 topology.add(data("shift", shift));
1390 topology.add(batch_norm("batch_norm", "input", "mean", "variance", "scale", "shift", epsilon));
1436 set_values(mean, { -3.3333f, -0.3583f, -3.3333f, -0.3583f, -3.3333f });
1437 set_values(variance, { 44.9305f, 107.0624f, 44.9305f, 107.0624f, 44.9305f });
1438 set_values(scale, { 2.f, 1.f, 3.f, 4.f, 5.f });
1439 set_values(shift, { 0.f, 5.f, -5.f, -15.f, 0.5f });
1441 std::vector<float> expected_result{
1442 0.646469f, 0.497283f,
1443 0.517855f, 0.179596f,
1444 0.646469f, 0.497283f,
1445 0.517855f, 0.179596f,
1446 0.646469f, 0.497283f,
1448 0.795655f, 0.497283f,
1449 0.614501f, 0.537184f,
1450 0.795655f, 0.497283f,
1451 0.614501f, 0.537184f,
1452 0.795655f, 0.497283f,
1454 -0.99458f, -1.14377f,
1455 -1.12512f, -1.22176f,
1456 -0.99458f, -1.14377f,
1457 -1.12512f, -1.22176f,
1458 -0.99458f, -1.14377f,
1460 0.944842f, 0.571876f,
1461 0.711146f, 1.19437f,
1462 0.944842f, 0.571876f,
1463 0.711146f, 1.19437f,
1464 0.944842f, 0.571876f,
1467 0.807792f, 0.904437f,
1469 0.807792f, 0.904437f,
1472 -1.59133f, -1.74051f,
1473 -1.5117f, -1.60834f,
1474 -1.59133f, -1.74051f,
1475 -1.5117f, -1.60834f,
1476 -1.59133f, -1.74051f
1479 network network(engine, topology);
1481 network.set_input_data("input", input);
1483 auto outputs = network.execute();
1485 auto output = outputs.at("batch_norm").get_memory();
1486 auto output_ptr = output.pointer<float>();
1488 for (int j = 0; j < 5; ++j) { //F
1489 float sum = 0, var = 0;
1491 auto scalep = scale.pointer<float>();
1492 auto shiftp = shift.pointer<float>();
1493 float scalef = scalep[j];
1494 float shiftf = shiftp[j];
1496 for (int i = 0; i < 2; ++i) { //B
1497 for (int k = 0; k < 2; ++k) { //Y
1498 for (int l = 0; l < 3; ++l) { //X
1499 int index = 30 * k + 10 * l + 2 * j + i;
1500 float data = output_ptr[index];
1501 data = (data - shiftf) / scalef;
1502 EXPECT_NEAR(data, expected_result[index], 1e-3F);
1511 EXPECT_NEAR(sum, 0, 1e-03F);
1512 EXPECT_NEAR(var, 1, 1e-03F);
1516 TEST(batch_normalization_gpu, basic_in2x3x5x2_yxfb_with_var_mean_outputs_no_inv_var_different_shapes) {
1517 const auto& engine = get_test_engine();
1519 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 5, 3, 2 } });
1520 auto mean_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 5, 1, 1, 1 } });
1521 auto variance_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 5, 1, 1 } });
1522 auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 5, 1 } });
1523 auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 1, 5 } });
1525 float epsilon = 0.0001f;
1528 topology.add(input_layout("input", input.get_layout()));
1529 topology.add(data("scale", scale));
1530 topology.add(data("shift", shift));
1531 topology.add(mutable_data("mean_out", mean_out));
1532 topology.add(mutable_data("variance_out", variance_out));
1533 topology.add(batch_norm("batch_norm", "input", epsilon, "mean_out", "variance_out", "scale", "shift"));
1579 set_values(scale, { 2.f, 1.f, 3.f, 4.f, 5.f });
1580 set_values(shift, { 0.f, 5.f, -5.f, -15.f, 0.5f });
1582 std::vector<float> expected_result{
1583 0.646469f, 0.497283f,
1584 0.517855f, 0.179596f,
1585 0.646469f, 0.497283f,
1586 0.517855f, 0.179596f,
1587 0.646469f, 0.497283f,
1589 0.795655f, 0.497283f,
1590 0.614501f, 0.537184f,
1591 0.795655f, 0.497283f,
1592 0.614501f, 0.537184f,
1593 0.795655f, 0.497283f,
1595 -0.99458f, -1.14377f,
1596 -1.12512f, -1.22176f,
1597 -0.99458f, -1.14377f,
1598 -1.12512f, -1.22176f,
1599 -0.99458f, -1.14377f,
1601 0.944842f, 0.571876f,
1602 0.711146f, 1.19437f,
1603 0.944842f, 0.571876f,
1604 0.711146f, 1.19437f,
1605 0.944842f, 0.571876f,
1608 0.807792f, 0.904437f,
1610 0.807792f, 0.904437f,
1613 -1.59133f, -1.74051f,
1614 -1.5117f, -1.60834f,
1615 -1.59133f, -1.74051f,
1616 -1.5117f, -1.60834f,
1617 -1.59133f, -1.74051f
1620 network network(engine, topology);
1622 network.set_input_data("input", input);
1624 auto outputs = network.execute();
1626 auto output = outputs.at("batch_norm").get_memory();
1627 auto output_ptr = output.pointer<float>();
1629 std::vector<float> mean_ref = { -3.3333f, -0.3583f, -3.3333f, -0.3583f, -3.3333f };
1630 std::vector<float> val_ref = { 44.9305f, 107.0624f, 44.9305f, 107.0624f, 44.9305f };
1632 for (int j = 0; j < 5; ++j) { //F
1633 float sum = 0, var = 0;
1635 auto scalep = scale.pointer<float>();
1636 auto shiftp = shift.pointer<float>();
1637 float scalef = scalep[j];
1638 float shiftf = shiftp[j];
1640 auto meanp = mean_out.pointer<float>();
1641 auto varp = variance_out.pointer<float>();
1642 float meanf = meanp[j];
1643 float varf = varp[j];
1645 for (int i = 0; i < 2; ++i) { //B
1646 for (int k = 0; k < 2; ++k) { //Y
1647 for (int l = 0; l < 3; ++l) { //X
1648 int index = 30 * k + 10 * l + 2 * j + i;
1649 float data = output_ptr[index];
1650 data = (data - shiftf) / scalef;
1651 EXPECT_NEAR(data, expected_result[index], 1e-3F);
1660 EXPECT_NEAR(sum, 0, 1e-03F);
1661 EXPECT_NEAR(var, 1, 1e-03F);
1663 EXPECT_NEAR(meanf, mean_ref[j], 1e-03F);
1664 EXPECT_NEAR(varf, val_ref[j], 1e-03F);
1668 TEST(batch_normalization_gpu, basic_in2x2x3x5_byxf_scale_shift_different_shapes) {
1669 const auto& engine = get_test_engine();
1671 auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 5, 3, 2 } });
1672 auto mean = memory::allocate(engine, { data_types::f32, format::byxf,{ 5, 1, 1, 1 } });
1673 auto variance = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 5, 1, 1 } });
1674 auto scale = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 5, 1 } });
1675 auto shift = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 1, 5 } });
1677 float epsilon = 0.0001f;
1680 topology.add(input_layout("input", input.get_layout()));
1681 topology.add(data("mean", mean));
1682 topology.add(data("variance", variance));
1683 topology.add(data("scale", scale));
1684 topology.add(data("shift", shift));
1685 topology.add(batch_norm("batch_norm", "input", "mean", "variance", "scale", "shift", epsilon));
1689 1.f, 5.f, 1.f, 5.f, 1.f, // x0
1690 2.f, 6.f, 2.f, 6.f, 2.f, // x1
1691 -10.f, -12.f, -10.f, -12.f, -10.f, //x2
1694 3.f, 7.f, 3.f, 7.f, 3.f,
1695 4.f, 8.f, 4.f, 8.f, 4.f,
1696 -14.f, -16.f, -14.f, -16.f, -14.f,
1699 0.f, 1.5f, 0.f, 1.5f, 0.f,
1700 0.f, 5.2f, 0.f, 5.2f, 0.f,
1701 -11.f, -13.f, -11.f, -13.f, -11.f,
1704 0.5f, 12.f, 0.5f, 12.f, 0.5f,
1705 -0.5f, 9.f, -0.5f, 9.f, -0.5f,
1706 -15.f, -17.f, -15.f, -17.f, -15.f
1709 set_values(mean, { -3.3333f, -0.3583f, -3.3333f, -0.3583f, -3.3333f });
1710 set_values(variance, { 44.9305f, 107.0624f, 44.9305f, 107.0624f, 44.9305f });
1711 set_values(scale, { 2.f, 1.f, 3.f, 4.f, 5.f });
1712 set_values(shift, { 0.f, 5.f, -5.f, -15.f, 0.5f });
1714 std::vector<float> expected_result{
1715 0.646469f, 0.517855f, 0.646469f, 0.517855f, 0.646469f,
1716 0.795655f, 0.614501f, 0.795655f, 0.614501f, 0.795655f,
1717 -0.99458f, -1.12512f, -0.99458f, -1.12512f, -0.99458f,
1719 0.944842f, 0.711146f, 0.944842f, 0.711146f, 0.944842f,
1720 1.09403f, 0.807792f, 1.09403f, 0.807792f, 1.09403f,
1721 -1.59133f, -1.5117f, -1.59133f, -1.5117f, -1.59133f,
1723 0.497283f, 0.179596f, 0.497283f, 0.179596f, 0.497283f,
1724 0.497283f, 0.537184f, 0.497283f, 0.537184f, 0.497283f,
1725 -1.14377f, -1.22176f, -1.14377f, -1.22176f, -1.14377f,
1727 0.571876f, 1.19437f, 0.571876f, 1.19437f, 0.571876f,
1728 0.42269f, 0.904437f, 0.42269f, 0.904437f, 0.42269f,
1729 -1.74051f, -1.60834f, -1.74051f, -1.60834f, -1.74051f
1732 network network(engine, topology);
1734 network.set_input_data("input", input);
1736 auto outputs = network.execute();
1738 auto output = outputs.at("batch_norm").get_memory();
1739 auto output_ptr = output.pointer<float>();
1741 for (int j = 0; j < 5; ++j) { //F
1742 float sum = 0, var = 0;
1744 auto scalep = scale.pointer<float>();
1745 auto shiftp = shift.pointer<float>();
1746 float scalef = scalep[j];
1747 float shiftf = shiftp[j];
1749 for (int i = 0; i < 2; ++i) { //B
1750 for (int k = 0; k < 2; ++k) { //Y
1751 for (int l = 0; l < 3; ++l) { //X
1752 auto index = 30 * i + 15 * k + 5 * l + j;
1753 float data = output_ptr[index];
1754 data = (data - shiftf) / scalef;
1755 EXPECT_NEAR(data, expected_result[index], 1e-3F);
1764 EXPECT_NEAR(sum, 0, 1e-03F);
1765 EXPECT_NEAR(var, 1, 1e-03F);
1769 TEST(batch_normalization_gpu, basic_in2x2x3x5_byxf_with_var_mean_outputs_no_inv_var_different_shapes) {
1770 const auto& engine = get_test_engine();
1772 auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 5, 3, 2 } });
1773 auto mean_out = memory::allocate(engine, { data_types::f32, format::byxf,{ 5, 1, 1, 1 } });
1774 auto variance_out = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 5, 1, 1 } });
1775 auto scale = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 5, 1 } });
1776 auto shift = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 1, 5 } });
1778 float epsilon = 0.0001f;
1781 topology.add(input_layout("input", input.get_layout()));
1782 topology.add(data("scale", scale));
1783 topology.add(data("shift", shift));
1784 topology.add(mutable_data("mean_out", mean_out));
1785 topology.add(mutable_data("variance_out", variance_out));
1786 topology.add(batch_norm("batch_norm", "input", epsilon, "mean_out", "variance_out", "scale", "shift"));
1790 1.f, 5.f, 1.f, 5.f, 1.f, // x0
1791 2.f, 6.f, 2.f, 6.f, 2.f, // x1
1792 -10.f, -12.f, -10.f, -12.f, -10.f, //x2
1795 3.f, 7.f, 3.f, 7.f, 3.f,
1796 4.f, 8.f, 4.f, 8.f, 4.f,
1797 -14.f, -16.f, -14.f, -16.f, -14.f,
1800 0.f, 1.5f, 0.f, 1.5f, 0.f,
1801 0.f, 5.2f, 0.f, 5.2f, 0.f,
1802 -11.f, -13.f, -11.f, -13.f, -11.f,
1805 0.5f, 12.f, 0.5f, 12.f, 0.5f,
1806 -0.5f, 9.f, -0.5f, 9.f, -0.5f,
1807 -15.f, -17.f, -15.f, -17.f, -15.f
1810 set_values(scale, { 2.f, 1.f, 3.f, 4.f, 5.f });
1811 set_values(shift, { 0.f, 5.f, -5.f, -15.f, 0.5f });
1813 network network(engine, topology);
1815 network.set_input_data("input", input);
1817 auto outputs = network.execute();
1819 auto output = outputs.at("batch_norm").get_memory();
1820 auto output_ptr = output.pointer<float>();
1822 std::vector<float> mean_ref = { -3.3333f, -0.3583f, -3.3333f, -0.3583f, -3.3333f };
1823 std::vector<float> val_ref = { 44.9305f, 107.0624f, 44.9305f, 107.0624f, 44.9305f };
1825 std::vector<float> expected_result{
1826 0.646469f, 0.517855f, 0.646469f, 0.517855f, 0.646469f,
1827 0.795655f, 0.614501f, 0.795655f, 0.614501f, 0.795655f,
1828 -0.99458f, -1.12512f, -0.99458f, -1.12512f, -0.99458f,
1830 0.944842f, 0.711146f, 0.944842f, 0.711146f, 0.944842f,
1831 1.09403f, 0.807792f, 1.09403f, 0.807792f, 1.09403f,
1832 -1.59133f, -1.5117f, -1.59133f, -1.5117f, -1.59133f,
1834 0.497283f, 0.179596f, 0.497283f, 0.179596f, 0.497283f,
1835 0.497283f, 0.537184f, 0.497283f, 0.537184f, 0.497283f,
1836 -1.14377f, -1.22176f, -1.14377f, -1.22176f, -1.14377f,
1838 0.571876f, 1.19437f, 0.571876f, 1.19437f, 0.571876f,
1839 0.42269f, 0.904437f, 0.42269f, 0.904437f, 0.42269f,
1840 -1.74051f, -1.60834f, -1.74051f, -1.60834f, -1.74051f
1843 for (int j = 0; j < 5; ++j) { //F
1844 float sum = 0, var = 0;
1846 auto scalep = scale.pointer<float>();
1847 auto shiftp = shift.pointer<float>();
1848 float scalef = scalep[j];
1849 float shiftf = shiftp[j];
1851 auto meanp = mean_out.pointer<float>();
1852 auto varp = variance_out.pointer<float>();
1853 float meanf = meanp[j];
1854 float varf = varp[j];
1856 for (int i = 0; i < 2; ++i) { //B
1857 for (int k = 0; k < 2; ++k) { //Y
1858 for (int l = 0; l < 3; ++l) { //X
1859 auto index = 30 * i + 15 * k + 5 * l + j;
1860 float data = output_ptr[index];
1861 data = (data - shiftf) / scalef;
1862 EXPECT_NEAR(data, expected_result[index], 1e-3F);
1871 EXPECT_NEAR(sum, 0, 1e-03F);
1872 EXPECT_NEAR(var, 1, 1e-03F);
1874 EXPECT_NEAR(meanf, mean_ref[j], 1e-03F);
1875 EXPECT_NEAR(varf, val_ref[j], 1e-03F);
1880 TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b1c2h2w2)
1882 const auto& engine = get_test_engine();
1884 tensor input_shape = { 1, 2, 2, 2 };
1885 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape });
1886 tensor mean_shape = { feature(2) };
1887 auto mean = memory::allocate(engine, { data_types::f32, format::bfyx, mean_shape });
1888 tensor var_shape = { feature(2) };
1889 auto variance = memory::allocate(engine, { data_types::f32, format::bfyx, var_shape });
1890 tensor gamma_shape = { feature(2) };
1891 auto gamma = memory::allocate(engine, { data_types::f32, format::bfyx, gamma_shape });
1892 tensor beta_shape = { feature(2) };
1893 auto beta = memory::allocate(engine, { data_types::f32, format::bfyx, beta_shape });
1898 topology.add(input_layout("input", input.get_layout()));
1899 topology.add(data("gamma", gamma));
1900 topology.add(data("beta", beta));
1901 topology.add(mutable_data("mean", mean));
1902 topology.add(mutable_data("variance", variance));
1903 topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
1905 set_values<float>(input, {
1917 set_values<float>(gamma, { 1.f, 1.f });
1918 set_values<float>(beta, { 0.f, 0.f });
1920 std::vector<float> expected_result {
1932 std::vector<float> expected_mean = { 0.602912f, 0.599727f };
1933 std::vector<float> expected_variance = { 0.00472505f, 0.0361782f };
1935 network network(engine, topology);
1937 network.set_input_data("input", input);
1939 auto outputs = network.execute();
1941 auto output = outputs.at("batch_norm").get_memory();
1942 auto output_ptr = output.pointer<float>();
1944 for (int j = 0; j < 2; ++j) { //F
1947 auto scalep = gamma.pointer<float>();
1948 auto shiftp = beta.pointer<float>();
1949 float scalef = scalep[j];
1950 float shiftf = shiftp[j];
1952 auto meanp = mean.pointer<float>();
1953 auto varp = variance.pointer<float>();
1954 float meanf = meanp[j];
1955 float varf = varp[j];
1957 for (int k = 0; k < 2; ++k) { //Y
1958 for (int l = 0; l < 2; ++l) { //X
1959 int index = 4 * j + 2 * k + l;
1960 float data = output_ptr[index];
1961 data = (data - shiftf) / scalef;
1962 EXPECT_NEAR(data, expected_result[index], 1e-5F);
1969 EXPECT_NEAR(sum, 0, 1e-5F);
1971 EXPECT_NEAR(meanf, expected_mean[j], 1e-5F);
1972 EXPECT_NEAR(varf, expected_variance[j], 1e-5F);
1976 TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b2c2h2w1)
1978 const auto& engine = get_test_engine();
1980 tensor input_shape = { 2, 2, 1, 2 };
1981 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape });
1982 tensor mean_shape = { feature(2) };
1983 auto mean = memory::allocate(engine, { data_types::f32, format::bfyx, mean_shape });
1984 tensor var_shape = { feature(2) };
1985 auto variance = memory::allocate(engine, { data_types::f32, format::bfyx, var_shape });
1986 tensor gamma_shape = { feature(2) };
1987 auto gamma = memory::allocate(engine, { data_types::f32, format::bfyx, gamma_shape });
1988 tensor beta_shape = { feature(2) };
1989 auto beta = memory::allocate(engine, { data_types::f32, format::bfyx, beta_shape });
1994 topology.add(input_layout("input", input.get_layout()));
1995 topology.add(data("gamma", gamma));
1996 topology.add(data("beta", beta));
1997 topology.add(mutable_data("mean", mean));
1998 topology.add(mutable_data("variance", variance));
1999 topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
2002 set_values<float>(input, {
2018 set_values<float>(gamma, { 1.f, 1.f });
2019 set_values<float>(beta, { 0.f, 0.f });
2021 std::vector<float> expected_result{
2036 std::vector<float> expected_mean = { 0.583388f, 0.619252f };
2037 std::vector<float> expected_variance = { 0.0119972f, 0.0282681f };
2038 network network(engine, topology);
2040 network.set_input_data("input", input);
2042 auto outputs = network.execute();
2044 auto output = outputs.at("batch_norm").get_memory();
2045 auto output_ptr = output.pointer<float>();
2047 for (int j = 0; j < 2; ++j) { //F
2050 auto scalep = gamma.pointer<float>();
2051 auto shiftp = beta.pointer<float>();
2052 float scalef = scalep[j];
2053 float shiftf = shiftp[j];
2055 auto meanp = mean.pointer<float>();
2056 auto varp = variance.pointer<float>();
2057 float meanf = meanp[j];
2058 float varf = varp[j];
2060 for (int k = 0; k < 2; ++k) { //B
2061 for (int l = 0; l < 2; ++l) { //Y
2062 int index = 4 * k + 2 * j + l;
2063 float data = output_ptr[index];
2064 data = (data - shiftf) / scalef;
2065 EXPECT_NEAR(data, expected_result[index], 1e-5F);
2072 EXPECT_NEAR(sum, 0, 1e-5F);
2074 EXPECT_NEAR(meanf, expected_mean[j], 1e-5F);
2075 EXPECT_NEAR(varf, expected_variance[j], 1e-5F);
2079 TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_inference_b2c2h2w1)
2081 const auto& engine = get_test_engine();
2083 tensor input_shape = { 2, 2, 1, 2 };
2084 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape });
2085 tensor mean_shape = { feature(2) };
2086 auto mean = memory::allocate(engine, { data_types::f32, format::bfyx, mean_shape });
2087 tensor var_shape = { feature(2) };
2088 auto variance = memory::allocate(engine, { data_types::f32, format::bfyx, var_shape });
2089 tensor gamma_shape = { feature(2) };
2090 auto gamma = memory::allocate(engine, { data_types::f32, format::bfyx, gamma_shape });
2091 tensor beta_shape = { feature(2) };
2092 auto beta = memory::allocate(engine, { data_types::f32, format::bfyx, beta_shape });
2097 topology.add(input_layout("input", input.get_layout()));
2098 topology.add(data("gamma", gamma));
2099 topology.add(data("beta", beta));
2100 topology.add(data("mean", mean));
2101 topology.add(data("variance", variance));
2102 topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
2105 set_values<float>(input, {
2121 set_values<float>(gamma, { 1.f, 1.f });
2122 set_values<float>(beta, { 0.f, 0.f });
2124 set_values<float>(mean, { 0.583388f, 0.619252f });
2125 set_values<float>(variance, { 0.0119972f, 0.0282681f });
2127 std::vector<float> expected_result{
2141 network network(engine, topology);
2143 network.set_input_data("input", input);
2145 auto outputs = network.execute();
2147 auto output = outputs.at("batch_norm").get_memory();
2148 auto output_ptr = output.pointer<float>();
2150 for (int j = 0; j < 2; ++j) { //F
2153 auto scalep = gamma.pointer<float>();
2154 auto shiftp = beta.pointer<float>();
2155 float scalef = scalep[j];
2156 float shiftf = shiftp[j];
2158 for (int k = 0; k < 2; ++k) { //B
2159 for (int l = 0; l < 2; ++l) { //Y
2160 int index = 4 * k + 2 * j + l;
2161 float data = output_ptr[index];
2162 data = (data - shiftf) / scalef;
2163 EXPECT_NEAR(data, expected_result[index], 1e-5F);
2170 EXPECT_NEAR(sum, 0, 1e-5F);
2174 TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b2c2h2w1_different_shapes)
2176 const auto& engine = get_test_engine();
2178 tensor input_shape = { 2, 2, 1, 2 };
2179 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape });
2180 tensor mean_shape = { 2, 1, 1, 1 };
2181 auto mean = memory::allocate(engine, { data_types::f32, format::bfyx, mean_shape });
2182 tensor var_shape = { 1, 2, 1, 1 };
2183 auto variance = memory::allocate(engine, { data_types::f32, format::bfyx, var_shape });
2184 tensor gamma_shape = { 1, 1, 2, 1 };
2185 auto gamma = memory::allocate(engine, { data_types::f32, format::bfyx, gamma_shape });
2186 tensor beta_shape = { 1, 1, 1, 2 };
2187 auto beta = memory::allocate(engine, { data_types::f32, format::bfyx, beta_shape });
2192 topology.add(input_layout("input", input.get_layout()));
2193 topology.add(data("gamma", gamma));
2194 topology.add(data("beta", beta));
2195 topology.add(mutable_data("mean", mean));
2196 topology.add(mutable_data("variance", variance));
2197 topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
2200 set_values<float>(input, {
2216 set_values<float>(gamma, { 2.f, 3.f });
2217 set_values<float>(beta, { 5.f, 10.f });
2219 std::vector<float> expected_result{
2234 std::vector<float> expected_mean = { 0.583388f, 0.619252f };
2235 std::vector<float> expected_variance = { 0.0119972f, 0.0282681f };
2236 network network(engine, topology);
2238 network.set_input_data("input", input);
2240 auto outputs = network.execute();
2242 auto output = outputs.at("batch_norm").get_memory();
2243 auto output_ptr = output.pointer<float>();
2245 for (int j = 0; j < 2; ++j) { //F
2248 auto scalep = gamma.pointer<float>();
2249 auto shiftp = beta.pointer<float>();
2250 float scalef = scalep[j];
2251 float shiftf = shiftp[j];
2253 auto meanp = mean.pointer<float>();
2254 auto varp = variance.pointer<float>();
2255 float meanf = meanp[j];
2256 float varf = varp[j];
2258 for (int k = 0; k < 2; ++k) { //B
2259 for (int l = 0; l < 2; ++l) { //Y
2260 int index = 4 * k + 2 * j + l;
2261 float data = output_ptr[index];
2262 data = (data - shiftf) / scalef;
2263 EXPECT_NEAR(data, expected_result[index], 1e-5F);
2270 EXPECT_NEAR(sum, 0, 1e-5F);
2272 EXPECT_NEAR(meanf, expected_mean[j], 1e-5F);
2273 EXPECT_NEAR(varf, expected_variance[j], 1e-5F);
2277 TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_inference_b2c2h2w1_different_shapes)
2279 const auto& engine = get_test_engine();
2281 tensor input_shape = { 2, 2, 1, 2 };
2282 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape });
2283 tensor mean_shape = { 2, 1, 1, 1 };
2284 auto mean = memory::allocate(engine, { data_types::f32, format::bfyx, mean_shape });
2285 tensor var_shape = { 1, 1, 2, 1 };
2286 auto variance = memory::allocate(engine, { data_types::f32, format::bfyx, var_shape });
2287 tensor gamma_shape = { 1, 1, 2, 1 };
2288 auto gamma = memory::allocate(engine, { data_types::f32, format::bfyx, gamma_shape });
2289 tensor beta_shape = { 1, 1, 1, 2 };
2290 auto beta = memory::allocate(engine, { data_types::f32, format::bfyx, beta_shape });
2295 topology.add(input_layout("input", input.get_layout()));
2296 topology.add(data("gamma", gamma));
2297 topology.add(data("beta", beta));
2298 topology.add(data("mean", mean));
2299 topology.add(data("variance", variance));
2300 topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
2303 set_values<float>(input, {
2319 set_values<float>(gamma, { 2.f, 3.f });
2320 set_values<float>(beta, { 5.f, 10.f });
2322 set_values<float>(mean, { 0.583388f, 0.619252f });
2323 set_values<float>(variance, { 0.0119972f, 0.0282681f });
2325 std::vector<float> expected_result{
2339 network network(engine, topology);
2341 network.set_input_data("input", input);
2343 auto outputs = network.execute();
2345 auto output = outputs.at("batch_norm").get_memory();
2346 auto output_ptr = output.pointer<float>();
2348 for (int j = 0; j < 2; ++j) { //F
2351 auto scalep = gamma.pointer<float>();
2352 auto shiftp = beta.pointer<float>();
2353 float scalef = scalep[j];
2354 float shiftf = shiftp[j];
2356 for (int k = 0; k < 2; ++k) { //B
2357 for (int l = 0; l < 2; ++l) { //Y
2358 int index = 4 * k + 2 * j + l;
2359 float data = output_ptr[index];
2360 data = (data - shiftf) / scalef;
2361 EXPECT_NEAR(data, expected_result[index], 1e-5F);
2368 EXPECT_NEAR(sum, 0, 1e-5F);
2372 TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b2c5h2w1_different_shapes)
2374 const auto& engine = get_test_engine();
2376 tensor input_shape = { 2, 5, 1, 2 };
2377 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape });
2378 tensor mean_shape = { 5, 1, 1, 1 };
2379 auto mean = memory::allocate(engine, { data_types::f32, format::bfyx, mean_shape });
2380 tensor var_shape = { 1, 5, 1, 1 };
2381 auto variance = memory::allocate(engine, { data_types::f32, format::bfyx, var_shape });
2382 tensor gamma_shape = { 1, 1, 5, 1 };
2383 auto gamma = memory::allocate(engine, { data_types::f32, format::bfyx, gamma_shape });
2384 tensor beta_shape = { 1, 1, 1, 5 };
2385 auto beta = memory::allocate(engine, { data_types::f32, format::bfyx, beta_shape });
2390 topology.add(input_layout("input", input.get_layout()));
2391 topology.add(data("gamma", gamma));
2392 topology.add(data("beta", beta));
2393 topology.add(mutable_data("mean", mean));
2394 topology.add(mutable_data("variance", variance));
2395 topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
2398 set_values<float>(input, {
2432 set_values<float>(gamma, { 2.f, 3.f, 4.f, 5.f, 1.f });
2433 set_values<float>(beta, { 5.f, 10.f, -10.f, -15.f, 0.f });
2435 std::vector<float> expected_result{
2470 std::vector<float> expected_mean = { 0.583388f, 0.619252f, 0.583388f, 0.619252f, 0.583388f };
2471 std::vector<float> expected_variance = { 0.0119972f, 0.0282681f, 0.0119972f, 0.0282681f, 0.0119972f };
2472 network network(engine, topology);
2474 network.set_input_data("input", input);
2476 auto outputs = network.execute();
2478 auto output = outputs.at("batch_norm").get_memory();
2479 auto output_ptr = output.pointer<float>();
2481 for (int j = 0; j < 5; ++j) { //F
2484 auto scalep = gamma.pointer<float>();
2485 auto shiftp = beta.pointer<float>();
2486 float scalef = scalep[j];
2487 float shiftf = shiftp[j];
2489 auto meanp = mean.pointer<float>();
2490 auto varp = variance.pointer<float>();
2491 float meanf = meanp[j];
2492 float varf = varp[j];
2494 for (int k = 0; k < 2; ++k) { //B
2495 for (int l = 0; l < 2; ++l) { //Y
2496 int index = 10 * k + 2 * j + l;
2497 float data = output_ptr[index];
2498 data = (data - shiftf) / scalef;
2499 EXPECT_NEAR(data, expected_result[index], 1e-5F);
2506 EXPECT_NEAR(sum, 0, 1e-5F);
2508 EXPECT_NEAR(meanf, expected_mean[j], 1e-5F);
2509 EXPECT_NEAR(varf, expected_variance[j], 1e-5F);
2513 TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_inference_b2c5h2w1_different_shapes)
2515 const auto& engine = get_test_engine();
2517 tensor input_shape = { 2, 5, 1, 2 };
2518 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape });
2519 tensor mean_shape = { 5, 1, 1, 1 };
2520 auto mean = memory::allocate(engine, { data_types::f32, format::bfyx, mean_shape });
2521 tensor var_shape = { 1, 5, 1, 1 };
2522 auto variance = memory::allocate(engine, { data_types::f32, format::bfyx, var_shape });
2523 tensor gamma_shape = { 1, 1, 5, 1 };
2524 auto gamma = memory::allocate(engine, { data_types::f32, format::bfyx, gamma_shape });
2525 tensor beta_shape = { 1, 1, 1, 5 };
2526 auto beta = memory::allocate(engine, { data_types::f32, format::bfyx, beta_shape });
2531 topology.add(input_layout("input", input.get_layout()));
2532 topology.add(data("gamma", gamma));
2533 topology.add(data("beta", beta));
2534 topology.add(data("mean", mean));
2535 topology.add(data("variance", variance));
2536 topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
2539 set_values<float>(input, {
2573 set_values<float>(gamma, { 2.f, 3.f, 4.f, 5.f, 1.f });
2574 set_values<float>(beta, { 5.f, 10.f, -10.f, -15.f, 0.f });
2576 std::vector<float> expected_result{
2611 set_values<float>(mean, { 0.583388f, 0.619252f, 0.583388f, 0.619252f, 0.583388f });
2612 set_values<float>(variance, { 0.0119972f, 0.0282681f, 0.0119972f, 0.0282681f, 0.0119972f });
2613 network network(engine, topology);
2615 network.set_input_data("input", input);
2617 auto outputs = network.execute();
2619 auto output = outputs.at("batch_norm").get_memory();
2620 auto output_ptr = output.pointer<float>();
2622 for (int j = 0; j < 5; ++j) { //F
2625 auto scalep = gamma.pointer<float>();
2626 auto shiftp = beta.pointer<float>();
2627 float scalef = scalep[j];
2628 float shiftf = shiftp[j];
2630 for (int k = 0; k < 2; ++k) { //B
2631 for (int l = 0; l < 2; ++l) { //Y
2632 int index = 10 * k + 2 * j + l;
2633 float data = output_ptr[index];
2634 data = (data - shiftf) / scalef;
2635 EXPECT_NEAR(data, expected_result[index], 1e-5F);
2642 EXPECT_NEAR(sum, 0, 1e-5F);
2646 TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b1c2h2w2_no_bn_output)
2650 tensor input_shape = { 1, 2, 2, 2 };
2651 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape });
2652 tensor mean_shape = { feature(2) };
2653 auto mean = memory::allocate(engine, { data_types::f32, format::bfyx, mean_shape });
2654 tensor var_shape = { feature(2) };
2655 auto variance = memory::allocate(engine, { data_types::f32, format::bfyx, var_shape });
2656 tensor gamma_shape = { feature(2) };
2657 auto gamma = memory::allocate(engine, { data_types::f32, format::bfyx, gamma_shape });
2658 tensor beta_shape = { feature(2) };
2659 auto beta = memory::allocate(engine, { data_types::f32, format::bfyx, beta_shape });
2664 topology.add(input_layout("input", input.get_layout()));
2665 topology.add(data("gamma", gamma));
2666 topology.add(data("beta", beta));
2667 topology.add(mutable_data("mean", mean));
2668 topology.add(mutable_data("variance", variance));
2669 topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
2671 set_values<float>(input, {
2683 set_values<float>(gamma, { 1.f, 1.f });
2684 set_values<float>(beta, { 0.f, 0.f });
2686 std::vector<float> expected_mean = { 0.602912f, 0.599727f };
2687 std::vector<float> expected_variance = { 0.00472505f, 0.0361782f };
2690 bo.set_option(build_option::outputs({ "mean", "variance" }));
2691 network network(engine, topology, bo);
2693 network.set_input_data("input", input);
2695 auto outputs = network.execute();
2697 for (int j = 0; j < 2; ++j) { //F
2698 auto meanp = mean.pointer<float>();
2699 auto varp = variance.pointer<float>();
2700 float meanf = meanp[j];
2701 float varf = varp[j];
2703 EXPECT_NEAR(meanf, expected_mean[j], 1e-5F);
2704 EXPECT_NEAR(varf, expected_variance[j], 1e-5F);