2 // Copyright (c) 2018 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include <gtest/gtest.h>
19 #include "api/memory.hpp"
20 #include <api/input_layout.hpp>
21 #include <api/topology.hpp>
22 #include <api/network.hpp>
23 #include <api/engine.hpp>
24 #include "test_utils/test_utils.h"
25 #include <api/reorder.hpp>
26 #include <api/data.hpp>
27 #include <api/activation.hpp>
28 #include <api/mutable_data.hpp>
29 #include <api/layout.hpp>
30 #include <api/tile.hpp>
31 #include <api/reshape.hpp>
33 #include <api/batch_norm.hpp>
34 #include <api/concatenation.hpp>
36 using namespace cldnn;
37 using namespace tests;
40 These tests are inteded to check if additional reorders are being added properly during
41 add_reorders optimization pass.
44 //Input has incompatible format
45 TEST(add_reorders_gpu, basic1) {
46 const auto& engine = get_test_engine();
48 auto input = memory::allocate(engine, { data_types::f32, format::fyxb,{ 2, 2, 3, 2 } }); //format unsupported by batch_norm!
49 auto mean = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
50 auto variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
63 set_values(mean, { 0.1f, 0.2f });
64 set_values(variance, { 0.4f, 0.5f });
66 float epsilon = 1e-3f;
67 float expected_out[] = {
81 topology.add(input_layout("input", input.get_layout()));
82 topology.add(data("mean", mean));
83 topology.add(data("variance", variance));
84 topology.add(batch_norm("batch_norm", "input", "mean", "variance", epsilon));
86 network network(engine, topology); // without additional reorders we would get an exception here
87 network.set_input_data("input", input);
89 EXPECT_EQ(network.get_all_primitive_org_ids().size(), size_t(5));
91 auto outputs = network.execute();
93 auto output = outputs.at("batch_norm").get_memory().pointer<float>();
94 for (int i = 0; i < 2 * 2 * 3 * 2; i++)
96 EXPECT_NEAR(expected_out[i], output[i], epsilon);
100 //concatenation of incompatible convolutions
101 TEST(add_reorders_gpu, two_convolutions_and_concatenation) {
102 const auto& engine = get_test_engine();
103 build_options build_opt;
104 build_opt.set_option(build_option::optimize_data(false));
106 auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
107 auto weights1 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 1, 2 } });
108 auto weights2 = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 1, 2 } });
110 set_values(input, { 1.1f, 1.2f, 1.3f, 1.4f });
111 set_values(weights1, { 2.1f, 3.1f});
112 set_values(weights2, { 1.1f, 0.1f});
115 topology.add(input_layout("input", input.get_layout()));
116 topology.add(data("weights1", weights1));
117 topology.add(data("weights2", weights2));
119 topology.add(cldnn::convolution("conv1", { "input" }, { "weights1" }));
120 topology.add(cldnn::reorder("reorder", "input", cldnn::layout(data_types::f32, format::byxf, tensor(4))));
121 topology.add(cldnn::convolution("conv2", { "reorder" }, { "weights2" }));
123 topology.add(cldnn::concatenation("concat", { "conv1", "conv2" }, cldnn::concatenation::along_f));
125 network network(engine, topology, build_opt);
126 network.set_input_data("input", input);
128 //concatenation accepts inputs in different formats, so no reorders should be added here
129 EXPECT_EQ(network.get_all_primitive_org_ids().size(), size_t(7));
130 auto outputs = network.execute();
132 float expected_out[] = { 6.34f, 1.34f, 6.86f, 1.46f };
133 float epsilon = 1e-3f;
135 for (auto& it : outputs)
137 auto output = it.second.get_memory().pointer<float>();
138 for (size_t cntr = 0; cntr < 2 * 2; cntr++)
140 EXPECT_NEAR(expected_out[cntr], output[cntr], epsilon);
145 template<typename data_t>
146 void tile_ref(const memory& input, memory& output, tile::tile_axis axis, int num_tiles)
148 auto get_sizes = [](const tensor& size, tile::tile_axis axis) -> std::pair<int, int>
152 case tile::along_b: return std::make_pair(1, size.batch[0] * size.feature[0] * size.spatial[2] * size.spatial[1] * size.spatial[0]);
153 case tile::along_f: return std::make_pair(size.batch[0], size.feature[0] * size.spatial[2] * size.spatial[1] * size.spatial[0]);
154 case tile::along_z: return std::make_pair(size.batch[0] * size.feature[0], size.spatial[2] * size.spatial[1] * size.spatial[0]);
155 case tile::along_y: return std::make_pair(size.batch[0] * size.feature[0] * size.spatial[2], size.spatial[1] * size.spatial[0]);
156 case tile::along_x: return std::make_pair(size.batch[0] * size.feature[0] * size.spatial[2] * size.spatial[1], size.spatial[0]);
157 default: throw std::invalid_argument("Invalid axis(" + std::to_string(static_cast<int>(axis)) + ") in tile ref version");
161 const pointer<data_t> src = input.pointer<data_t>();
162 pointer<data_t> dst = output.pointer<data_t>();
164 const data_t* psrc = src.data();
165 data_t* pdst = dst.data();
167 auto sizes = get_sizes(input.get_layout().size, axis);
168 int outer_dim = sizes.first;
169 int inner_dim = sizes.second;
171 for (int i = 0; i < outer_dim; i++)
173 for (int t = 0; t < num_tiles; t++)
175 for (int j = 0; j < inner_dim; j++)
185 TEST(add_reorders_gpu, basic_reshape_and_tile) {
186 const auto& engine = get_test_engine();
188 auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 2, 2, 1 } });
189 auto output_ref = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 1, 4, 2 } });
192 topology.add(input_layout("input", input.get_layout()));
193 topology.add(reshape("reshape", "input", tensor(2, 1, 2, 1)));
194 topology.add(tile("tile", "reshape", tile::along_y, 4));
196 std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f };
197 set_values(input, input_vec);
198 tile_ref<float>(input, output_ref, tile::along_y, 4);
200 network network(engine, topology);
201 network.set_input_data("input", input);
203 //reorder is required as tile accepts only bfyx format
204 EXPECT_EQ(network.get_all_primitive_org_ids().size(), size_t(4));
205 auto outputs = network.execute();
207 auto output = outputs.at("tile").get_memory();
208 auto output_ptr = output.pointer<float>();
209 auto output_ref_ptr = output_ref.pointer<float>();
211 for (unsigned int i = 0; i < output_ref.count(); ++i) {
212 EXPECT_EQ(output_ptr[i], output_ref_ptr[i]);