2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
19 #include <gtest/gtest.h>
20 #include "api/CPP/memory.hpp"
21 #include <api/CPP/input_layout.hpp>
22 #include "api/CPP/convolution_grad_weights.hpp"
23 #include "api/CPP/convolution.hpp"
24 #include "api/CPP/convolution_grad_input.hpp"
25 #include "api/CPP/reorder.hpp"
26 #include <api/CPP/mutable_data.hpp>
27 #include <api/CPP/data.hpp>
28 #include <api/CPP/topology.hpp>
29 #include <api/CPP/network.hpp>
30 #include <api/CPP/engine.hpp>
31 #include "test_utils/test_utils.h"
33 using namespace cldnn;
34 using namespace tests;
36 void validate_output(std::vector<float> expected_weights_vec, std::map<primitive_id, network_output> outputs)
38 EXPECT_EQ(outputs.size(), size_t(1));
39 EXPECT_EQ(outputs.begin()->first, "conv_grad_weights");
41 auto output_prim = outputs.begin()->second.get_memory();
42 auto output_ptr = output_prim.pointer<float>();
44 for (unsigned int i = 0; i < expected_weights_vec.size(); i++)
46 float x = float_round(expected_weights_vec[i]);
47 float y = float_round(output_ptr[i]);
48 EXPECT_FLOAT_EQ(x, y) << "on weights verification" << random_seed << std::endl;
52 TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1) {
54 // Input grad : 1x2x2x2
66 const auto& engine = get_test_engine();
68 auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
69 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
70 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 3 } });
71 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
73 set_values(input, { 8.f, 0.5f, 6.f, 9.f });
74 set_values(input_grad, { 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f, 1.7f, 1.8f });
77 input_layout("input_grad", input_grad.get_layout()),
79 mutable_data("weights", weights),
80 mutable_data("biases", biases),
81 convolution_grad_weights("conv_grad_weights", "input_grad", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
84 network network(engine, topology);
85 network.set_input_data("input_grad", input_grad);
87 auto outputs = network.execute();
88 EXPECT_EQ(outputs.size(), size_t(1));
89 EXPECT_EQ(outputs.begin()->first, "conv_grad_weights");
91 auto output_prim = outputs.begin()->second.get_memory();
93 auto output_ptr = output_prim.pointer<float>();
94 auto weights_ptr = weights.pointer<float>();
95 auto biases_ptr = biases.pointer<float>();
97 std::vector<float> expected_weights_vec = {
107 std::vector<float> expected_bias_vec = {
111 for (unsigned int i = 0; i < expected_weights_vec.size(); i++)
113 float x = float_round(expected_weights_vec[i] * lr), y = float_round(weights_ptr[i]);
114 EXPECT_FLOAT_EQ(x, -y) << "on weights verification" << random_seed << std::endl;
117 for (unsigned int i = 0; i < expected_bias_vec.size(); i++)
119 float x = float_round(expected_bias_vec[i] * lr), y = float_round(biases_ptr[i]);
120 EXPECT_FLOAT_EQ(x, -y) << "on biases verification" << random_seed << std::endl;
124 TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz2x2_in8x1x2x2_bfyx_stride2_pad1) {
126 // Input grad : 1x2x2x2
142 const auto& engine = get_test_engine();
144 auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
145 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
146 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 3 } });
147 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
149 set_values(input, { 8.f, 0.5f, 6.f, 9.f, 8.f, 0.5f, 4.f, 7.f });
150 set_values(input_grad, { 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f, 1.7f, 1.8f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 1.f, 1.7f, 1.8f });
153 input_layout("input_grad", input_grad.get_layout()),
154 data("input", input),
155 mutable_data("weights", weights),
156 mutable_data("biases", biases),
157 convolution_grad_weights("conv_grad_weights", "input_grad", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
160 network network(engine, topology);
161 network.set_input_data("input_grad", input_grad);
163 auto outputs = network.execute();
164 EXPECT_EQ(outputs.size(), size_t(1));
165 EXPECT_EQ(outputs.begin()->first, "conv_grad_weights");
167 auto output_prim = outputs.begin()->second.get_memory();
169 auto output_ptr = output_prim.pointer<float>();
170 auto weights_ptr = weights.pointer<float>();
171 auto biases_ptr = biases.pointer<float>();
173 std::vector<float> expected_weights_vec = {
183 std::vector<float> expected_bias_vec = {
187 for (unsigned int i = 0; i < expected_weights_vec.size(); i++)
189 float x = float_round(expected_weights_vec[i] * lr), y = float_round(weights_ptr[i]);
190 EXPECT_FLOAT_EQ(x, -y) << "on weights verification" << random_seed << std::endl;
193 for (unsigned int i = 0; i < expected_bias_vec.size(); i++)
195 float x = float_round(expected_bias_vec[i] * lr), y = float_round(biases_ptr[i]);
196 EXPECT_FLOAT_EQ(x, -y) << "on biases verification" << random_seed << std::endl;
200 TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1_no_bias) {
202 // Input grad : 1x2x2x2
214 const auto& engine = get_test_engine();
216 auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } });
217 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
218 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 3 } });
220 set_values(input, { 8.f, 0.5f, 6.f, 9.f });
221 set_values(input_grad, { 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f, 1.7f, 1.8f });
224 input_layout("input_grad", input_grad.get_layout()),
225 data("input", input),
226 mutable_data("weights", weights),
227 convolution_grad_weights("conv_grad_weights", "input_grad", "input", { "weights" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
230 network network(engine, topology);
231 network.set_input_data("input_grad", input_grad);
233 auto outputs = network.execute();
234 EXPECT_EQ(outputs.size(), size_t(1));
235 EXPECT_EQ(outputs.begin()->first, "conv_grad_weights");
237 auto output_prim = outputs.begin()->second.get_memory();
239 auto output_ptr = output_prim.pointer<float>();
240 auto weights_ptr = weights.pointer<float>();
242 std::vector<float> expected_weights_vec = {
252 for (unsigned int i = 0; i < expected_weights_vec.size(); i++)
254 float x = float_round(expected_weights_vec[i] * lr), y = float_round(weights_ptr[i]);
255 EXPECT_FLOAT_EQ(x, -y) << "on weights verification" << random_seed << std::endl;
259 TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1_fwd_backw) {
261 // Input grad : 1x2x2x2
276 const auto& engine = get_test_engine();
278 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
280 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
281 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
283 set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f });
284 set_values(weights, { 2.0f, 1.0f, 1.0f, 1.0f });
285 set_values(biases, { 0.f } );
288 input_layout("input", input.get_layout()),
289 reorder("input_reordered", "input", input.get_layout()),
290 mutable_data("weights", weights),
291 mutable_data("biases", biases),
292 convolution("conv", "input_reordered", { "weights" }, { "biases" }, { 1, 1, 1, 1 }, { 0, 0, -1, -1 }),
293 convolution_grad_input("conv_grad_input", "conv", { "weights" }, { 1, 1, 1, 1 }, { 0, 0, -1, -1 }),
294 convolution_grad_weights("conv_grad_weights", "conv", "input_reordered", { "weights" }, { "biases" }, { 1, 1, 1, 1 },
295 { 0, 0, -1, -1 }, { 1,1,1,1 }, "conv_grad_input")
298 opt.set_option(build_option::outputs({ "conv_grad_input", "conv_grad_weights" }));
299 network network(engine, topology, opt);
300 network.set_input_data("input", input);
301 network.set_learning_rate(lr);
303 auto outputs = network.execute();
305 auto output_prim = outputs.at("conv_grad_input").get_memory();
307 auto output_ptr = output_prim.pointer<float>();
308 auto weights_ptr = weights.pointer<float>();
309 auto biases_ptr = biases.pointer<float>();
311 std::vector<float> expected_output_vec = {
312 30.0f, 32.0f, 38.0f, 45.0f
315 std::vector<float> expected_weights_vec = {
316 2 - 89 * lr, 1 - 75 * lr, 1 - 72 * lr, 1 - 63 * lr
319 std::vector<float> expected_bias_vec = {
323 for (unsigned int i = 0; i < expected_output_vec.size(); i++)
325 float x = float_round(expected_output_vec[i]), y = float_round(output_ptr[i]);
326 EXPECT_FLOAT_EQ(x, y) << "on output verification" << random_seed << std::endl;
327 x = float_round(expected_weights_vec[i]);
328 y = float_round(weights_ptr[i]);
329 EXPECT_FLOAT_EQ(x, y) << "on weights verification" << random_seed << std::endl;
332 float x = float_round(expected_bias_vec[0]), y = float_round(biases_ptr[0]);
333 EXPECT_FLOAT_EQ(x, y) << "on biases verification" << random_seed << std::endl;
336 TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1_split2) {
338 // Input grad : 1x2x2x2
350 const auto& engine = get_test_engine();
352 auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } });
353 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } });
354 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 3 } });
355 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
356 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 3 } });
357 auto biases2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
359 set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.0f, 2.0f, 3.0f, 4.0f });
360 set_values(input_grad, { 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f, 1.7f, 1.8f });
363 input_layout("input_grad", input_grad.get_layout()),
364 data("input", input),
365 mutable_data("weights", weights),
366 mutable_data("biases", biases),
367 mutable_data("weights2", weights2),
368 mutable_data("biases2", biases2),
369 convolution_grad_weights("conv_grad_weights", "input_grad", "input", { "weights", "weights2" }, { "biases", "biases2" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
372 network network(engine, topology);
373 network.set_input_data("input_grad", input_grad);
375 auto outputs = network.execute();
376 EXPECT_EQ(outputs.size(), size_t(1));
377 EXPECT_EQ(outputs.begin()->first, "conv_grad_weights");
379 auto output_prim = outputs.begin()->second.get_memory();
381 auto output_ptr = output_prim.pointer<float>();
382 auto weights_ptr = weights.pointer<float>();
383 auto biases_ptr = biases.pointer<float>();
384 auto weights2_ptr = weights2.pointer<float>();
385 auto biases2_ptr = biases2.pointer<float>();
387 std::vector<float> expected_weights_vec = {
393 std::vector<float> expected_bias_vec = {
397 std::vector<float> expected_weights2_vec = {
403 std::vector<float> expected_bias2_vec = {
407 for (unsigned int i = 0; i < expected_weights_vec.size(); i++)
409 float x = float_round(expected_weights_vec[i] * lr), y = float_round(weights_ptr[i]);
410 EXPECT_FLOAT_EQ(x, -y) << "on weights verification" << random_seed << std::endl;
411 x = float_round(expected_weights2_vec[i] * lr);
412 y = float_round(weights2_ptr[i]);
413 EXPECT_FLOAT_EQ(x, -y) << "on weights verification" << random_seed << std::endl;
416 for (unsigned int i = 0; i < expected_bias_vec.size(); i++)
418 float x = float_round(expected_bias_vec[i] * lr), y = float_round(biases_ptr[i]);
419 EXPECT_FLOAT_EQ(x, -y) << "on biases verification" << random_seed << std::endl;
420 x = float_round(expected_bias2_vec[i] * lr);
421 y = float_round(biases2_ptr[i]);
422 EXPECT_FLOAT_EQ(x, -y) << "on biases verification" << random_seed << std::endl;
426 TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz1x1_in1x2x5x5_bfyx_stride2_pad1) {
428 // Input grad : 1x2x2x2
433 // 0.5 0.6 0.7 0.9 1 1.1 0.7 0.9 0.1 1.9
434 // 0.7 0.8 0.8 1.7 1.8 1.2 2.1 0.5 0.2 0.9
435 // 0.6 0.5 0.4 0.2 0.1 1.5 0.6 0.7 0.3 0.8
436 // 0.7 0.8 0.9 0.2 0.4 1.8 0.4 0.9 0.4 0.7
437 // 0.6 0.5 0.4 0.1 0.1 1.7 0.5 0.4 0.5 0.6
445 const auto& engine = get_test_engine();
447 auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 5, 5 } });
448 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 4 } });
449 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 1, 1 } });
450 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
456 9.f, 10.f, 11.f, 11.f
458 set_values(input_grad, {
459 0.5f, 0.6f, 0.7f, 0.9f, 1.f,
460 0.7f, 0.8f, 0.8f, 1.7f, 1.8f,
461 0.6f, 0.5f, 0.4f, 0.2f, 0.1f,
462 0.7f, 0.8f, 0.9f, 0.2f, 0.4f,
463 0.6f, 0.5f, 0.4f, 0.1f, 0.1f,
464 1.1f, 0.7f, 0.9f, 0.1f, 1.9f,
465 1.2f, 2.1f, 0.5f, 0.2f, 0.9f,
466 1.5f, 0.6f, 0.7f, 0.3f, 0.8f,
467 1.8f, 0.4f, 0.9f, 0.4f, 0.7f,
468 1.7f, 0.5f, 0.4f, 0.5f, 0.6f
472 input_layout("input_grad", input_grad.get_layout()),
473 data("input", input),
474 mutable_data("weights", weights),
475 mutable_data("biases", biases),
476 convolution_grad_weights("conv_grad_weights", "input_grad", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
479 network network(engine, topology);
480 network.set_input_data("input_grad", input_grad);
482 auto outputs = network.execute();
483 EXPECT_EQ(outputs.size(), size_t(1));
484 EXPECT_EQ(outputs.begin()->first, "conv_grad_weights");
486 auto output_prim = outputs.begin()->second.get_memory();
488 auto output_ptr = output_prim.pointer<float>();
489 auto weights_ptr = weights.pointer<float>();
490 auto biases_ptr = biases.pointer<float>();
492 std::vector<float> expected_weights_vec = {
496 std::vector<float> expected_bias_vec = {
500 for (unsigned int i = 0; i < expected_weights_vec.size(); i++)
502 float x = float_round(expected_weights_vec[i] * lr), y = float_round(weights_ptr[i]);
503 EXPECT_FLOAT_EQ(x, -y) << "on weights verification " << random_seed << std::endl;
506 for (unsigned int i = 0; i < expected_bias_vec.size(); i++)
508 float x = float_round(expected_bias_vec[i] * lr), y = float_round(biases_ptr[i]);
509 EXPECT_FLOAT_EQ(x, -y) << "on biases verification " << random_seed << std::endl;
513 TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz2x2_in32x1x2x2_yxfb_stride1) {
515 // Input grad : 32x1x2x2
520 // y0: x0: 0.5 0.6 0.7 0.9 1 1.1 0.7 0.9 0.1 1.9 0.6 0.5 0.4 0.1 0.1 1.7 0.5 0.4 0.5 0.6 0.7 0.8 0.8 1.7 1.8 1.2 2.1 0.5 0.2 0.9 0.5 0.6
521 // y0: x1: 0.7 0.8 0.8 1.7 1.8 1.2 2.1 0.5 0.2 0.9 0.4 0.1 0.1 1.7 0.5 0.4 0.5 0.6 0.7 0.9 1 1.1 0.7 0.9 0.1 1.9 0.1 1.5 0.6 2.1 0.4 0.3
522 // y1: x0: 0.5 0.6 0.7 0.9 1 1.1 0.7 0.9 0.1 1.9 0.1 1.7 0.5 0.4 0.4 0.1 0.1 1.7 0.5 0.4 0.5 0.6 1.2 2.1 0.5 0.2 0.9 0.4 0.1 1.2 0.2 0.1
523 // y1: x1: 0.7 0.8 0.8 1.7 1.8 1.2 2.1 0.9 0.1 1.9 0.1 1.7 0.5 0.6 0.7 0.9 1 1.1 0.7 0.9 0.1 0.1 1.7 0.5 0.4 1.7 0.5 0.4 0.5 0.6 0.0 0.7
526 // y0: x0: 0.4 0.1 0.1 1.7 0.5 0.4 0.5 0.6 0.7 0.8 0.8 1.7 1.8 1.2 2.1 0.5 0.2 0.9 0.5 0.6 0.7 0.9 1 1.1 0.7 0.9 0.1 0.6 1.2 2.1 0.1 0.2
527 // y0: x1: 0.7 0.8 0.8 1.7 1.8 1.2 2.1 0.5 0.2 0.9 0.4 0.1 0.1 1.7 0.5 0.4 0.5 1 1.1 0.7 0.9 0.1 1.9 0.1 1.7 0.5 0.4 0.4 0.1 0.1 0.3 0.4
528 // y0: x2: 0.1 1.7 0.5 0.4 0.4 0.1 0.1 1.7 0.5 0.4 0.5 1.1 0.7 0.9 0.1 0.1 1.7 0.5 0.1 1.9 0.6 0.5 0.4 0.1 0.1 1.7 0.5 0.4 2.1 0.5 0.5 0.6
529 // y1: x0: 1.9 0.1 1.7 0.5 0.6 0.7 0.9 1 1.1 0.7 0.8 1.7 1.8 1.2 2.1 0.5 0.2 0.9 0.4 0.2 0.9 0.4 0.1 1.2 1.9 0.1 1.5 0.6 2.1 2.3 0.7 0.8
530 // y1: x1: 0.4 0.1 0.1 1.7 0.5 0.4 0.5 0.6 0.7 0.8 0.8 1.7 1.8 1.2 2.1 0.5 0.2 0.9 0.5 0.6 0.7 0.9 1 1.1 0.7 0.9 0.1 0.6 1.2 2.1 0.9 1.0
531 // y1: x2: 0.7 0.8 0.8 1.7 1.8 1.2 2.1 0.5 0.2 0.9 0.4 0.1 0.1 1.7 0.5 0.4 0.5 1 1.1 0.7 0.9 0.1 1.9 0.1 1.7 0.5 0.4 0.4 0.1 0.1 1.1 1.2
532 // y2: x0: 0.7 0.8 0.8 1.7 1.8 1.2 2.1 0.9 0.1 1.9 0.1 1.7 0.5 0.6 0.7 0.9 1 1.1 0.7 0.9 0.1 0.1 1.7 0.5 0.4 1.7 0.5 0.4 0.5 0.6 1.3 1.4
533 // y2: x1: 0.5 0.6 0.7 0.9 1 1.1 0.7 0.9 0.1 1.9 0.6 0.5 0.4 0.1 0.1 1.7 0.5 0.4 0.5 0.6 0.7 0.8 0.8 1.7 1.8 1.2 2.1 0.5 0.2 0.9 1.5 1.6
534 // y2: x2: 0.5 0.6 0.7 0.9 1 1.1 0.7 0.9 0.1 1.9 0.1 1.7 0.5 0.4 0.4 0.1 0.1 1.7 0.5 0.4 0.5 0.6 1.2 2.1 0.5 0.2 0.9 0.4 0.1 1.2 1.7 1.8
536 const auto& engine = get_test_engine();
538 auto input_grad = memory::allocate(engine, { data_types::f32, format::yxfb,{ 32, 1, 2, 2 } });
539 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 32, 1, 3, 3 } });
540 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
541 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
544 0.4f, 0.1f, 0.1f, 1.7f, 0.5f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.8f, 1.7f, 1.8f, 1.2f, 2.1f, 0.5f, 0.2f, 0.9f, 0.5f, 0.6f, 0.7f, 0.9f, 1.f, 1.1f, 0.7f, 0.9f, 0.1f, 0.6f, 1.2f, 2.1f, 0.1f, 0.2f,
545 0.7f, 0.8f, 0.8f, 1.7f, 1.8f, 1.2f, 2.1f, 0.5f, 0.2f, 0.9f, 0.4f, 0.1f, 0.1f, 1.7f, 0.5f, 0.4f, 0.5f, 1.f, 1.1f, 0.7f, 0.9f, 0.1f, 1.9f, 0.1f, 1.7f, 0.5f, 0.4f, 0.4f, 0.1f, 0.1f, 0.3f, 0.4f,
546 0.1f, 1.7f, 0.5f, 0.4f, 0.4f, 0.1f, 0.1f, 1.7f, 0.5f, 0.4f, 0.5f, 1.1f, 0.7f, 0.9f, 0.1f, 0.1f, 1.7f, 0.5f, 0.1f, 1.9f, 0.6f, 0.5f, 0.4f, 0.1f, 0.1f, 1.7f, 0.5f, 0.4f, 2.1f, 0.5f, 0.5f, 0.6f,
547 1.9f, 0.1f, 1.7f, 0.5f, 0.6f, 0.7f, 0.9f, 1.f, 1.1f, 0.7f, 0.8f, 1.7f, 1.8f, 1.2f, 2.1f, 0.5f, 0.2f, 0.9f, 0.4f, 0.2f, 0.9f, 0.4f, 0.1f, 1.2f, 1.9f, 0.1f, 1.5f, 0.6f, 2.1f, 2.3f, 0.7f, 0.8f,
548 0.4f, 0.1f, 0.1f, 1.7f, 0.5f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.8f, 1.7f, 1.8f, 1.2f, 2.1f, 0.5f, 0.2f, 0.9f, 0.5f, 0.6f, 0.7f, 0.9f, 1.f, 1.1f, 0.7f, 0.9f, 0.1f, 0.6f, 1.2f, 2.1f, 0.9f, 1.0f,
549 0.7f, 0.8f, 0.8f, 1.7f, 1.8f, 1.2f, 2.1f, 0.5f, 0.2f, 0.9f, 0.4f, 0.1f, 0.1f, 1.7f, 0.5f, 0.4f, 0.5f, 1.f, 1.1f, 0.7f, 0.9f, 0.1f, 1.9f, 0.1f, 1.7f, 0.5f, 0.4f, 0.4f, 0.1f, 0.1f, 1.1f, 1.2f,
550 0.7f, 0.8f, 0.8f, 1.7f, 1.8f, 1.2f, 2.1f, 0.9f, 0.1f, 1.9f, 0.1f, 1.7f, 0.5f, 0.6f, 0.7f, 0.9f, 1.f, 1.1f, 0.7f, 0.9f, 0.1f, 0.1f, 1.7f, 0.5f, 0.4f, 1.7f, 0.5f, 0.4f, 0.5f, 0.6f, 1.3f, 1.4f,
551 0.5f, 0.6f, 0.7f, 0.9f, 1.f, 1.1f, 0.7f, 0.9f, 0.1f, 1.9f, 0.6f, 0.5f, 0.4f, 0.1f, 0.1f, 1.7f, 0.5f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.8f, 1.7f, 1.8f, 1.2f, 2.1f, 0.5f, 0.2f, 0.9f, 1.5f, 1.6f,
552 0.5f, 0.6f, 0.7f, 0.9f, 1.f, 1.1f, 0.7f, 0.9f, 0.1f, 1.9f, 0.1f, 1.7f, 0.5f, 0.4f, 0.4f, 0.1f, 0.1f, 1.7f, 0.5f, 0.4f, 0.5f, 0.6f, 1.2f, 2.1f, 0.5f, 0.2f, 0.9f, 0.4f, 0.1f, 1.2f, 1.7f, 1.8f
554 set_values(input_grad, {
555 0.5f, 0.6f, 0.7f, 0.9f, 1.f, 1.1f, 0.7f, 0.9f, 0.1f, 1.9f, 0.6f, 0.5f, 0.4f, 0.1f, 0.1f, 1.7f, 0.5f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.8f, 1.7f, 1.8f, 1.2f, 2.1f, 0.5f, 0.2f, 0.9f, 0.5f, 0.6f,
556 0.7f, 0.8f, 0.8f, 1.7f, 1.8f, 1.2f, 2.1f, 0.5f, 0.2f, 0.9f, 0.4f, 0.1f, 0.1f, 1.7f, 0.5f, 0.4f, 0.5f, 0.6f, 0.7f, 0.9f, 1.f, 1.1f, 0.7f, 0.9f, 0.1f, 1.9f, 0.1f, 1.5f, 0.6f, 2.1f, 0.4f, 0.3f,
557 0.5f, 0.6f, 0.7f, 0.9f, 1.f, 1.1f, 0.7f, 0.9f, 0.1f, 1.9f, 0.1f, 1.7f, 0.5f, 0.4f, 0.4f, 0.1f, 0.1f, 1.7f, 0.5f, 0.4f, 0.5f, 0.6f, 1.2f, 2.1f, 0.5f, 0.2f, 0.9f, 0.4f, 0.1f, 1.2f, 0.2f, 0.1f,
558 0.7f, 0.8f, 0.8f, 1.7f, 1.8f, 1.2f, 2.1f, 0.9f, 0.1f, 1.9f, 0.1f, 1.7f, 0.5f, 0.6f, 0.7f, 0.9f, 1.f, 1.1f, 0.7f, 0.9f, 0.1f, 0.1f, 1.7f, 0.5f, 0.4f, 1.7f, 0.5f, 0.4f, 0.5f, 0.6f, 0.0f, 0.7f
562 input_layout("input_grad", input_grad.get_layout()),
563 data("input", input),
564 mutable_data("weights", weights),
565 mutable_data("biases", biases),
566 convolution_grad_weights("conv_grad_weights", "input_grad", "input", { "weights" }, { "biases" }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 })
569 network network(engine, topology);
570 network.set_input_data("input_grad", input_grad);
572 auto outputs = network.execute();
573 EXPECT_EQ(outputs.size(), size_t(1));
574 EXPECT_EQ(outputs.begin()->first, "conv_grad_weights");
576 auto output_prim = outputs.begin()->second.get_memory();
578 auto weights_ptr = weights.pointer<float>();
579 auto biases_ptr = biases.pointer<float>();
581 std::vector<float> expected_weights_vec = {
582 90.58f, 85.92f, 97.22f, 91.86f
585 std::vector<float> expected_bias_vec = {
589 for (unsigned int i = 0; i < expected_weights_vec.size(); i++)
591 float x = float_round(expected_weights_vec[i] * lr), y = float_round(weights_ptr[i]);
592 EXPECT_FLOAT_EQ(x, -y) << "on weights verification " << random_seed << std::endl;
595 for (unsigned int i = 0; i < expected_bias_vec.size(); i++)
597 float x = float_round(expected_bias_vec[i] * lr), y = float_round(biases_ptr[i]);
598 EXPECT_FLOAT_EQ(x, -y) << "on biases verification " << random_seed << std::endl;
602 TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz3x3_in2x1x3x3_bfyx_stride1_pad1) {
604 // Input grad : 2x2x3x3
609 // 0.4 0.1 0.1 1.7 0.5 0.4 0.5 0.6 0.7
610 // 0.7 0.8 0.8 1.7 1.8 1.2 2.1 0.5 0.2
611 // 0.1 1.7 0.5 0.4 0.4 0.1 0.1 1.7 0.5
612 // 1.9 0.1 1.7 0.5 0.6 0.7 0.9 1 1.1
615 // 0.5 0.6 0.7 0.9 1 1.1 0.7 0.9 0.1
616 // 0.7 0.8 0.8 1.7 1.8 1.2 2.1 0.5 0.2
618 const auto& engine = get_test_engine();
620 auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 3 } });
621 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 3 } });
622 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 3 } });
623 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
626 0.5f, 0.6f, 0.7f, 0.9f, 1.f, 1.1f, 0.7f, 0.9f, 0.1f,
627 0.7f, 0.8f, 0.8f, 1.7f, 1.8f, 1.2f, 2.1f, 0.5f, 0.2f
629 set_values(input_grad, {
630 0.4f, 0.1f, 0.1f, 1.7f, 0.5f, 0.4f, 0.5f, 0.6f, 0.7f,
631 0.7f, 0.8f, 0.8f, 1.7f, 1.8f, 1.2f, 2.1f, 0.5f, 0.2f,
632 0.1f, 1.7f, 0.5f, 0.4f, 0.4f, 0.1f, 0.1f, 1.7f, 0.5f,
633 1.9f, 0.1f, 1.7f, 0.5f, 0.6f, 0.7f, 0.9f, 1.f, 1.1f
637 input_layout("input_grad", input_grad.get_layout()),
638 data("input", input),
639 mutable_data("weights", weights),
640 mutable_data("biases", biases),
641 convolution_grad_weights("conv_grad_weights", "input_grad", "input", { "weights" }, { "biases" }, { 1, 1, 1, 1 }, { 0, 0, -1, -1 })
644 network network(engine, topology);
645 network.set_input_data("input_grad", input_grad);
647 auto outputs = network.execute();
648 EXPECT_EQ(outputs.size(), size_t(1));
649 EXPECT_EQ(outputs.begin()->first, "conv_grad_weights");
651 auto output_prim = outputs.begin()->second.get_memory();
653 auto output_ptr = output_prim.pointer<float>();
654 auto weights_ptr = weights.pointer<float>();
655 auto biases_ptr = biases.pointer<float>();
657 std::vector<float> expected_weights_vec = {
662 6.93f, 11.42f, 8.63f,
663 10.59f, 16.13f, 10.47f,
667 std::vector<float> expected_bias_vec = {
671 for (unsigned int i = 0; i < expected_weights_vec.size(); i++)
673 float x = float_round(expected_weights_vec[i] * lr), y = float_round(weights_ptr[i]);
674 EXPECT_FLOAT_EQ(x, -y) << "on weights verification" << random_seed << std::endl;
677 for (unsigned int i = 0; i < expected_bias_vec.size(); i++)
679 float x = float_round(expected_bias_vec[i] * lr), y = float_round(biases_ptr[i]);
680 EXPECT_FLOAT_EQ(x, -y) << "on biases verification" << random_seed << std::endl;
684 TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz3x3_in2x1x3x3_bfyx_stride1_pad1_momentum) {
686 // Input grad : 2x2x3x3
691 // 0.4 0.1 0.1 1.7 0.5 0.4 0.5 0.6 0.7
692 // 0.7 0.8 0.8 1.7 1.8 1.2 2.1 0.5 0.2
693 // 0.1 1.7 0.5 0.4 0.4 0.1 0.1 1.7 0.5
694 // 1.9 0.1 1.7 0.5 0.6 0.7 0.9 1 1.1
697 // 0.5 0.6 0.7 0.9 1 1.1 0.7 0.9 0.1
698 // 0.7 0.8 0.8 1.7 1.8 1.2 2.1 0.5 0.2
700 const auto& engine = get_test_engine();
702 auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 3 } });
703 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 3 } });
704 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 3 } });
705 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
706 auto prev_weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 3} });
707 auto prev_biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1} });
710 0.5f, 0.6f, 0.7f, 0.9f, 1.f, 1.1f, 0.7f, 0.9f, 0.1f,
711 0.7f, 0.8f, 0.8f, 1.7f, 1.8f, 1.2f, 2.1f, 0.5f, 0.2f
713 set_values(input_grad, {
714 0.4f, 0.1f, 0.1f, 1.7f, 0.5f, 0.4f, 0.5f, 0.6f, 0.7f,
715 0.7f, 0.8f, 0.8f, 1.7f, 1.8f, 1.2f, 2.1f, 0.5f, 0.2f,
716 0.1f, 1.7f, 0.5f, 0.4f, 0.4f, 0.1f, 0.1f, 1.7f, 0.5f,
717 1.9f, 0.1f, 1.7f, 0.5f, 0.6f, 0.7f, 0.9f, 1.f, 1.1f
721 input_layout("input_grad", input_grad.get_layout()),
722 data("input", input),
723 mutable_data("weights", weights),
724 mutable_data("biases", biases),
725 mutable_data("prev_weights", prev_weights),
726 mutable_data("prev_biases", prev_biases),
727 convolution_grad_weights("conv_grad_weights", "input_grad", "input", { "weights" }, { "biases" }, { "prev_weights" }, { "prev_biases" }, { 1, 1, 1, 1 }, { 0, 0, -1, -1 })
730 network network(engine, topology);
731 network.set_input_data("input_grad", input_grad);
733 auto outputs = network.execute();
734 EXPECT_EQ(outputs.size(), size_t(1));
735 EXPECT_EQ(outputs.begin()->first, "conv_grad_weights");
737 auto output_prim = outputs.begin()->second.get_memory();
739 auto output_ptr = output_prim.pointer<float>();
740 auto weights_ptr = weights.pointer<float>();
741 auto biases_ptr = biases.pointer<float>();
743 std::vector<float> expected_weights_vec = {
748 6.93f, 11.42f, 8.63f,
749 10.59f, 16.13f, 10.47f,
753 std::vector<float> expected_bias_vec = {
757 for (unsigned int i = 0; i < expected_weights_vec.size(); i++)
759 float x = float_round(expected_weights_vec[i] * lr), y = float_round(weights_ptr[i]);
760 EXPECT_FLOAT_EQ(x, -y) << "on weights verification" << random_seed << std::endl;
763 for (unsigned int i = 0; i < expected_bias_vec.size(); i++)
765 float x = float_round(expected_bias_vec[i] * lr), y = float_round(biases_ptr[i]);
766 EXPECT_FLOAT_EQ(x, -y) << "on biases verification" << random_seed << std::endl;
770 TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz7x7_in2x1x7x7_bfyx_stride1_pad3) {
772 // Input grad : 2x2x7x7
777 // b0:f0: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 b0:f1: 0.1 0.2 0.3 0.4 0.5 0.6 0.7
778 // b0:f0: 0.7 0.6 0.5 0.4 0.3 0.2 0.1 b0:f1: 0.1 0.2 0.3 0.5 0.7 1.1 1.3
779 // b0:f0: 0.1 0.7 0.2 0.6 0.3 0.5 0.4 b0:f1: 0.7 0.6 0.5 0.4 0.3 0.2 0.1
780 // b0:f0: 0.3 0.4 0.5 0.6 0.7 0.8 0.9 b0:f1: 0.1 0.7 0.2 0.6 0.3 0.5 0.4
781 // b0:f0: 0.9 0.8 0.7 0.6 0.5 0.4 0.3 b0:f1: 0.3 0.4 0.5 0.6 0.7 0.8 0.9
782 // b0:f0: 0.3 0.9 0.4 0.8 0.5 0.7 0.6 b0:f1: 0.9 0.8 0.7 0.6 0.5 0.4 0.3
783 // b0:f0: 0.1 0.2 0.3 0.5 0.7 1.1 1.3 b0:f1: 0.3 0.9 0.4 0.8 0.5 0.7 0.6
785 // b1:f0: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 b1:f1: 0.1 0.2 0.3 0.4 0.5 0.6 0.7
786 // b1:f0: 0.7 0.6 0.5 0.4 0.3 0.2 0.1 b1:f1: 0.3 0.4 0.5 0.6 0.7 0.8 0.9
787 // b1:f0: 0.1 0.7 0.2 0.6 0.3 0.5 0.4 b1:f1: 0.7 0.6 0.5 0.4 0.3 0.2 0.1
788 // b1:f0: 0.3 0.9 0.4 0.8 0.5 0.7 0.6 b1:f1: 0.1 0.7 0.2 0.6 0.3 0.5 0.4
789 // b1:f0: 0.3 0.4 0.5 0.6 0.7 0.8 0.9 b1:f1: 0.9 0.8 0.7 0.6 0.5 0.4 0.3
790 // b1:f0: 0.9 0.8 0.7 0.6 0.5 0.4 0.3 b1:f1: 0.3 0.9 0.4 0.8 0.5 0.7 0.6
791 // b1:f0: 0.1 0.2 0.3 0.5 0.7 1.1 1.3 b1:f1: 0.1 0.2 0.3 0.5 0.7 1.1 1.3
794 // b0:f0: 0.5 0.6 0.7 0.9 0.2 0.1 0.7 b0:f1: 0.5 0.6 0.4 0.9 0.2 0.1 0.5
795 // b0:f0: 0.7 0.8 0.2 0.1 0.7 0.8 0.8 b0:f1: 0.9 0.3 0.7 0.5 0.6 0.7 0.9
796 // b0:f0: 0.5 0.1 0.7 0.9 0.6 0.1 0.7 b0:f1: 0.7 0.8 0.8 0.7 0.8 0.2 0.1
797 // b0:f0: 0.7 0.2 0.8 0.7 0.8 0.2 0.1 b0:f1: 0.5 0.6 0.7 0.9 0.3 0.1 0.7
798 // b0:f0: 0.1 0.7 0.5 0.6 0.7 0.9 0.1 b0:f1: 0.7 0.8 0.8 0.7 0.8 0.2 0.1
799 // b0:f0: 0.7 0.8 0.8 0.7 0.8 0.2 0.1 b0:f1: 0.4 0.6 0.1 0.2 0.1 0.1 0.7
800 // b0:f0: 0.5 0.6 0.7 0.9 0. 0.1 0.7 b0:f1: 0.5 0.3 0.7 0.5 0.4 0.1 0.7
802 const auto& engine = get_test_engine();
804 auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 7, 7 } });
805 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 7, 7 } });
806 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 7, 7 } });
807 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
810 0.5f, 0.6f, 0.7f, 0.9f, 0.2f, 0.1f, 0.7f,
811 0.7f, 0.8f, 0.2f, 0.1f, 0.7f, 0.8f, 0.8f,
812 0.5f, 0.1f, 0.7f, 0.9f, 0.6f, 0.1f, 0.7f,
813 0.7f, 0.2f, 0.8f, 0.7f, 0.8f, 0.2f, 0.1f,
814 0.1f, 0.7f, 0.5f, 0.6f, 0.7f, 0.9f, 0.1f,
815 0.7f, 0.8f, 0.8f, 0.7f, 0.8f, 0.2f, 0.1f,
816 0.5f, 0.6f, 0.7f, 0.9f, 0.f, 0.1f, 0.7f,
818 0.5f, 0.6f, 0.4f, 0.9f, 0.2f, 0.1f, 0.5f,
819 0.9f, 0.3f, 0.7f, 0.5f, 0.6f, 0.7f, 0.9f,
820 0.7f, 0.8f, 0.8f, 0.7f, 0.8f, 0.2f, 0.1f,
821 0.5f, 0.6f, 0.7f, 0.9f, 0.3f, 0.1f, 0.7f,
822 0.7f, 0.8f, 0.8f, 0.7f, 0.8f, 0.2f, 0.1f,
823 0.4f, 0.6f, 0.1f, 0.2f, 0.1f, 0.1f, 0.7f,
824 0.5f, 0.3f, 0.7f, 0.5f, 0.4f, 0.1f, 0.7f
826 set_values(input_grad, {
827 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,
828 0.7f, 0.6f, 0.5f, 0.4f, 0.3f, 0.2f, 0.1f,
829 0.1f, 0.7f, 0.2f, 0.6f, 0.3f, 0.5f, 0.4f,
830 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f,
831 0.9f, 0.8f, 0.7f, 0.6f, 0.5f, 0.4f, 0.3f,
832 0.3f, 0.9f, 0.4f, 0.8f, 0.5f, 0.7f, 0.6f,
833 0.1f, 0.2f, 0.3f, 0.5f, 0.7f, 1.1f, 1.3f,
835 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,
836 0.1f, 0.2f, 0.3f, 0.5f, 0.7f, 1.1f, 1.3f,
837 0.7f, 0.6f, 0.5f, 0.4f, 0.3f, 0.2f, 0.1f,
838 0.1f, 0.7f, 0.2f, 0.6f, 0.3f, 0.5f, 0.4f,
839 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f,
840 0.9f, 0.8f, 0.7f, 0.6f, 0.5f, 0.4f, 0.3f,
841 0.3f, 0.9f, 0.4f, 0.8f, 0.5f, 0.7f, 0.6f,
843 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,
844 0.7f, 0.6f, 0.5f, 0.4f, 0.3f, 0.2f, 0.1f,
845 0.1f, 0.7f, 0.2f, 0.6f, 0.3f, 0.5f, 0.4f,
846 0.3f, 0.9f, 0.4f, 0.8f, 0.5f, 0.7f, 0.6f,
847 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f,
848 0.9f, 0.8f, 0.7f, 0.6f, 0.5f, 0.4f, 0.3f,
849 0.1f, 0.2f, 0.3f, 0.5f, 0.7f, 1.1f, 1.3f,
851 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,
852 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f,
853 0.7f, 0.6f, 0.5f, 0.4f, 0.3f, 0.2f, 0.1f,
854 0.1f, 0.7f, 0.2f, 0.6f, 0.3f, 0.5f, 0.4f,
855 0.9f, 0.8f, 0.7f, 0.6f, 0.5f, 0.4f, 0.3f,
856 0.3f, 0.9f, 0.4f, 0.8f, 0.5f, 0.7f, 0.6f,
857 0.1f, 0.2f, 0.3f, 0.5f, 0.7f, 1.1f, 1.3f
861 input_layout("input_grad", input_grad.get_layout()),
862 data("input", input),
863 mutable_data("weights", weights),
864 mutable_data("biases", biases),
865 convolution_grad_weights("conv_grad_weights", "input_grad", "input", { "weights" }, { "biases" }, { 1, 1, 1, 1 }, { 0, 0, -3, -3 })
868 network network(engine, topology);
869 network.set_input_data("input_grad", input_grad);
871 auto outputs = network.execute();
872 ASSERT_EQ(outputs.size(), size_t(1));
873 EXPECT_EQ(outputs.begin()->first, "conv_grad_weights");
875 auto output_prim = outputs.begin()->second.get_memory();
877 auto output_ptr = output_prim.pointer<float>();
878 auto weights_ptr = weights.pointer<float>();
879 auto biases_ptr = biases.pointer<float>();
881 std::vector<float> expected_weights_vec = {
882 14.02f, 15.52f, 15.92f, 17.84f, 14.41f, 11.16f, 8.43f,
883 15.63f, 18.22f, 20.7f, 20.47f, 16.75f, 13.52f, 10.06f,
884 16.14f, 19.15f, 21.4f, 23.61f, 19.83f, 15.77f, 12.25f,
885 20.18f, 21.93f, 22.73f, 24.75f, 20.79f, 15.54f, 12.24f,
886 14.02f, 16.77f, 20.24f, 22.51f, 19.33f, 15.67f, 11.58f,
887 11.96f, 14.57f, 15.26f, 16.94f, 13.79f, 11.1f, 8.14f,
888 9.38f, 10.3f, 11.09f, 12.31f, 10.68f, 7.95f, 6.34f,
890 11.67f, 13.25f, 14.95f, 16.62f, 13.74f, 11.6f, 8.86f,
891 12.85f, 15.77f, 18.18f, 19.95f, 16.98f, 14.05f, 11.02f,
892 16.69f, 18.76f, 20.57f, 22.6f, 19.66f, 15.07f, 12.14f,
893 18.11f, 20.92f, 23.91f, 27.39f, 22.55f, 17.55f, 12.81f,
894 15.32f, 18.54f, 20.06f, 21.88f, 18.13f, 13.96f, 10.35f,
895 13.34f, 15.14f, 15.16f, 16.43f, 13.54f, 10.41f, 7.75f,
896 10.28f, 11.8f, 12.68f, 12.49f, 9.91f, 7.05f, 4.94f
899 std::vector<float> expected_bias_vec = {
903 for (unsigned int i = 0; i < expected_weights_vec.size(); i++)
905 float x = float_round(expected_weights_vec[i] * lr), y = float_round(weights_ptr[i]);
906 EXPECT_FLOAT_EQ(x, -y) << "on weights verification" << random_seed << std::endl;
909 for (unsigned int i = 0; i < expected_bias_vec.size(); i++)
911 float x = float_round(expected_bias_vec[i] * lr), y = float_round(biases_ptr[i]);
912 EXPECT_FLOAT_EQ(x, -y) << "on biases verification" << random_seed << std::endl;
916 TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz7x7_in2x1x7x7_bfyx_stride1_pad3_momentum) {
918 // Input grad : 2x2x7x7
923 // b0:f0: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 b0:f1: 0.1 0.2 0.3 0.4 0.5 0.6 0.7
924 // b0:f0: 0.7 0.6 0.5 0.4 0.3 0.2 0.1 b0:f1: 0.1 0.2 0.3 0.5 0.7 1.1 1.3
925 // b0:f0: 0.1 0.7 0.2 0.6 0.3 0.5 0.4 b0:f1: 0.7 0.6 0.5 0.4 0.3 0.2 0.1
926 // b0:f0: 0.3 0.4 0.5 0.6 0.7 0.8 0.9 b0:f1: 0.1 0.7 0.2 0.6 0.3 0.5 0.4
927 // b0:f0: 0.9 0.8 0.7 0.6 0.5 0.4 0.3 b0:f1: 0.3 0.4 0.5 0.6 0.7 0.8 0.9
928 // b0:f0: 0.3 0.9 0.4 0.8 0.5 0.7 0.6 b0:f1: 0.9 0.8 0.7 0.6 0.5 0.4 0.3
929 // b0:f0: 0.1 0.2 0.3 0.5 0.7 1.1 1.3 b0:f1: 0.3 0.9 0.4 0.8 0.5 0.7 0.6
931 // b1:f0: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 b1:f1: 0.1 0.2 0.3 0.4 0.5 0.6 0.7
932 // b1:f0: 0.7 0.6 0.5 0.4 0.3 0.2 0.1 b1:f1: 0.3 0.4 0.5 0.6 0.7 0.8 0.9
933 // b1:f0: 0.1 0.7 0.2 0.6 0.3 0.5 0.4 b1:f1: 0.7 0.6 0.5 0.4 0.3 0.2 0.1
934 // b1:f0: 0.3 0.9 0.4 0.8 0.5 0.7 0.6 b1:f1: 0.1 0.7 0.2 0.6 0.3 0.5 0.4
935 // b1:f0: 0.3 0.4 0.5 0.6 0.7 0.8 0.9 b1:f1: 0.9 0.8 0.7 0.6 0.5 0.4 0.3
936 // b1:f0: 0.9 0.8 0.7 0.6 0.5 0.4 0.3 b1:f1: 0.3 0.9 0.4 0.8 0.5 0.7 0.6
937 // b1:f0: 0.1 0.2 0.3 0.5 0.7 1.1 1.3 b1:f1: 0.1 0.2 0.3 0.5 0.7 1.1 1.3
940 // b0:f0: 0.5 0.6 0.7 0.9 0.2 0.1 0.7 b0:f1: 0.5 0.6 0.4 0.9 0.2 0.1 0.5
941 // b0:f0: 0.7 0.8 0.2 0.1 0.7 0.8 0.8 b0:f1: 0.9 0.3 0.7 0.5 0.6 0.7 0.9
942 // b0:f0: 0.5 0.1 0.7 0.9 0.6 0.1 0.7 b0:f1: 0.7 0.8 0.8 0.7 0.8 0.2 0.1
943 // b0:f0: 0.7 0.2 0.8 0.7 0.8 0.2 0.1 b0:f1: 0.5 0.6 0.7 0.9 0.3 0.1 0.7
944 // b0:f0: 0.1 0.7 0.5 0.6 0.7 0.9 0.1 b0:f1: 0.7 0.8 0.8 0.7 0.8 0.2 0.1
945 // b0:f0: 0.7 0.8 0.8 0.7 0.8 0.2 0.1 b0:f1: 0.4 0.6 0.1 0.2 0.1 0.1 0.7
946 // b0:f0: 0.5 0.6 0.7 0.9 0. 0.1 0.7 b0:f1: 0.5 0.3 0.7 0.5 0.4 0.1 0.7
948 const auto& engine = get_test_engine();
950 auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 7, 7 } });
951 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 7, 7 } });
952 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 7, 7 } });
953 auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
954 auto prev_weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 7, 7 } });
955 auto prev_biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
958 0.5f, 0.6f, 0.7f, 0.9f, 0.2f, 0.1f, 0.7f,
959 0.7f, 0.8f, 0.2f, 0.1f, 0.7f, 0.8f, 0.8f,
960 0.5f, 0.1f, 0.7f, 0.9f, 0.6f, 0.1f, 0.7f,
961 0.7f, 0.2f, 0.8f, 0.7f, 0.8f, 0.2f, 0.1f,
962 0.1f, 0.7f, 0.5f, 0.6f, 0.7f, 0.9f, 0.1f,
963 0.7f, 0.8f, 0.8f, 0.7f, 0.8f, 0.2f, 0.1f,
964 0.5f, 0.6f, 0.7f, 0.9f, 0.f, 0.1f, 0.7f,
966 0.5f, 0.6f, 0.4f, 0.9f, 0.2f, 0.1f, 0.5f,
967 0.9f, 0.3f, 0.7f, 0.5f, 0.6f, 0.7f, 0.9f,
968 0.7f, 0.8f, 0.8f, 0.7f, 0.8f, 0.2f, 0.1f,
969 0.5f, 0.6f, 0.7f, 0.9f, 0.3f, 0.1f, 0.7f,
970 0.7f, 0.8f, 0.8f, 0.7f, 0.8f, 0.2f, 0.1f,
971 0.4f, 0.6f, 0.1f, 0.2f, 0.1f, 0.1f, 0.7f,
972 0.5f, 0.3f, 0.7f, 0.5f, 0.4f, 0.1f, 0.7f
974 set_values(input_grad, {
975 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,
976 0.7f, 0.6f, 0.5f, 0.4f, 0.3f, 0.2f, 0.1f,
977 0.1f, 0.7f, 0.2f, 0.6f, 0.3f, 0.5f, 0.4f,
978 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f,
979 0.9f, 0.8f, 0.7f, 0.6f, 0.5f, 0.4f, 0.3f,
980 0.3f, 0.9f, 0.4f, 0.8f, 0.5f, 0.7f, 0.6f,
981 0.1f, 0.2f, 0.3f, 0.5f, 0.7f, 1.1f, 1.3f,
983 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,
984 0.1f, 0.2f, 0.3f, 0.5f, 0.7f, 1.1f, 1.3f,
985 0.7f, 0.6f, 0.5f, 0.4f, 0.3f, 0.2f, 0.1f,
986 0.1f, 0.7f, 0.2f, 0.6f, 0.3f, 0.5f, 0.4f,
987 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f,
988 0.9f, 0.8f, 0.7f, 0.6f, 0.5f, 0.4f, 0.3f,
989 0.3f, 0.9f, 0.4f, 0.8f, 0.5f, 0.7f, 0.6f,
991 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,
992 0.7f, 0.6f, 0.5f, 0.4f, 0.3f, 0.2f, 0.1f,
993 0.1f, 0.7f, 0.2f, 0.6f, 0.3f, 0.5f, 0.4f,
994 0.3f, 0.9f, 0.4f, 0.8f, 0.5f, 0.7f, 0.6f,
995 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f,
996 0.9f, 0.8f, 0.7f, 0.6f, 0.5f, 0.4f, 0.3f,
997 0.1f, 0.2f, 0.3f, 0.5f, 0.7f, 1.1f, 1.3f,
999 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,
1000 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f,
1001 0.7f, 0.6f, 0.5f, 0.4f, 0.3f, 0.2f, 0.1f,
1002 0.1f, 0.7f, 0.2f, 0.6f, 0.3f, 0.5f, 0.4f,
1003 0.9f, 0.8f, 0.7f, 0.6f, 0.5f, 0.4f, 0.3f,
1004 0.3f, 0.9f, 0.4f, 0.8f, 0.5f, 0.7f, 0.6f,
1005 0.1f, 0.2f, 0.3f, 0.5f, 0.7f, 1.1f, 1.3f
1009 input_layout("input_grad", input_grad.get_layout()),
1010 data("input", input),
1011 mutable_data("weights", weights),
1012 mutable_data("biases", biases),
1013 mutable_data("prev_weights", prev_weights),
1014 mutable_data("prev_biases", prev_biases),
1015 convolution_grad_weights("conv_grad_weights", "input_grad", "input", { "weights" }, { "biases" }, { "prev_weights" }, { "prev_biases" }, { 1, 1, 1, 1 }, { 0, 0, -3, -3 })
1018 network network(engine, topology);
1019 network.set_input_data("input_grad", input_grad);
1021 auto outputs = network.execute();
1022 ASSERT_EQ(outputs.size(), size_t(1));
1023 EXPECT_EQ(outputs.begin()->first, "conv_grad_weights");
1025 auto output_prim = outputs.begin()->second.get_memory();
1027 auto output_ptr = output_prim.pointer<float>();
1028 auto weights_ptr = weights.pointer<float>();
1029 auto biases_ptr = biases.pointer<float>();
1031 std::vector<float> expected_weights_vec = {
1032 14.02f, 15.52f, 15.92f, 17.84f, 14.41f, 11.16f, 8.43f,
1033 15.63f, 18.22f, 20.7f, 20.47f, 16.75f, 13.52f, 10.06f,
1034 16.14f, 19.15f, 21.4f, 23.61f, 19.83f, 15.77f, 12.25f,
1035 20.18f, 21.93f, 22.73f, 24.75f, 20.79f, 15.54f, 12.24f,
1036 14.02f, 16.77f, 20.24f, 22.51f, 19.33f, 15.67f, 11.58f,
1037 11.96f, 14.57f, 15.26f, 16.94f, 13.79f, 11.1f, 8.14f,
1038 9.38f, 10.3f, 11.09f, 12.31f, 10.68f, 7.95f, 6.34f,
1040 11.67f, 13.25f, 14.95f, 16.62f, 13.74f, 11.6f, 8.86f,
1041 12.85f, 15.77f, 18.18f, 19.95f, 16.98f, 14.05f, 11.02f,
1042 16.69f, 18.76f, 20.57f, 22.6f, 19.66f, 15.07f, 12.14f,
1043 18.11f, 20.92f, 23.91f, 27.39f, 22.55f, 17.55f, 12.81f,
1044 15.32f, 18.54f, 20.06f, 21.88f, 18.13f, 13.96f, 10.35f,
1045 13.34f, 15.14f, 15.16f, 16.43f, 13.54f, 10.41f, 7.75f,
1046 10.28f, 11.8f, 12.68f, 12.49f, 9.91f, 7.05f, 4.94f
1049 std::vector<float> expected_bias_vec = {
1053 for (unsigned int i = 0; i < expected_weights_vec.size(); i++)
1055 float x = float_round(expected_weights_vec[i] * lr), y = float_round(weights_ptr[i]);
1056 EXPECT_FLOAT_EQ(x, -y) << "on weights verification" << random_seed << std::endl;
1059 for (unsigned int i = 0; i < expected_bias_vec.size(); i++)
1061 float x = float_round(expected_bias_vec[i] * lr), y = float_round(biases_ptr[i]);
1062 EXPECT_FLOAT_EQ(x, -y) << "on biases verification" << random_seed << std::endl;
1066 TEST(convolution_grad_weights_f32_fw_gpu, ngraph_2d_1item_2iterations) {
1068 // Input grad : 1x2x4x2
1072 const auto& engine = get_test_engine();
1073 auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 4, 2 } });
1074 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 3 } });
1075 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
1079 input_layout("input_grad", input_grad.get_layout()),
1080 data("input", input),
1081 mutable_data("weights", weights),
1082 convolution_grad_weights("conv_grad_weights", "input_grad", "input", { "weights" }, { 1,1,1,1 }, { 0,0,0,0 }, { 1,1,1,1 }, true)
1086 bo.set_option(build_option::optimize_data(true));
1087 network network(engine, topology, bo);
1090 // set values for first iteration
1092 { 0.671875f, 0.546875f, -0.5625f, -0.359375f, -0.09375f, 0.546875f, -0.546875f, 0.890625f, 0.828125f, -0.546875f, 1.f, -0.078125f, -0.890625f, 0.40625f, -0.359375f });
1093 set_values(input_grad,
1094 { 1.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
1095 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f });
1096 network.set_input_data("input_grad", input_grad);
1097 std::vector<float> expected_weights_vec =
1098 { 0.671875f, 0.546875f, 0.546875f, -0.546875f,
1099 0.f, 0.f, 0.f, 0.f };
1100 auto outputs = network.execute();
1101 validate_output(expected_weights_vec, outputs);
1103 // set values for second iteration
1104 set_values(input_grad,
1105 { 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
1106 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.f });
1107 network.set_input_data("input_grad", input_grad);
1108 expected_weights_vec =
1109 { 0.f, 0.f, 0.f, 0.f,
1110 0.828125f, -0.546875f, 0.40625f, -0.359375f };
1111 outputs = network.execute();
1112 validate_output(expected_weights_vec, outputs);