inference-engine/thirdparty/clDNN/tests/test_cases/add_reorders_gpu_test.cpp

   1 /*
   2 // Copyright (c) 2018 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18 #include <gtest/gtest.h>
  19 #include "api/memory.hpp"
  20 #include <api/input_layout.hpp>
  21 #include <api/topology.hpp>
  22 #include <api/network.hpp>
  23 #include <api/engine.hpp>
  24 #include "test_utils/test_utils.h"
  25 #include <api/reorder.hpp>
  26 #include <api/data.hpp>
  27 #include <api/activation.hpp>
  28 #include <api/mutable_data.hpp>
  29 #include <api/layout.hpp>
  30 #include <api/tile.hpp>
  31 #include <api/reshape.hpp>
  32
  33 #include <api/batch_norm.hpp>
  34 #include <api/concatenation.hpp>
  35
  36 using namespace cldnn;
  37 using namespace tests;
  38
  39 /*
  40 These tests are inteded to check if additional reorders are being added  properly during
  41 add_reorders optimization pass.
  42 */
  43
  44 //Input has incompatible format
  45 TEST(add_reorders_gpu, basic1) {
  46     const auto& engine = get_test_engine();
  47
  48     auto input = memory::allocate(engine, { data_types::f32, format::fyxb,{ 2, 2, 3, 2 } }); //format unsupported by batch_norm!
  49     auto mean = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
  50     auto variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
  51
  52     set_values(input, {
  53         1.f, 2.f, -10.f,
  54         3.f, 4.f, -14.f,
  55         5.f, 6.f, -12.f,
  56         7.f, 8.f, -16.f,
  57         0.f, 0.f, -11.f,
  58         0.5f, -0.5f, -15.f,
  59         1.5f, 5.2f, -13.f,
  60         12.f, 9.f, -17.f
  61     });
  62
  63     set_values(mean, { 0.1f, 0.2f });
  64     set_values(variance, { 0.4f, 0.5f });
  65
  66     float epsilon = 1e-3f;
  67     float expected_out[] = {
  68         1.42125f,  3.00042f,
  69        -0.28256f, -0.28256f,
  70       -15.94960f,  4.57958f,
  71       -15.82340f,  0.42384f,
  72         6.15875f,-22.26620f,
  73        -0.98896f,-21.47460f,
  74         7.73791f,  9.31708f,
  75         1.83664f,  7.06401f,
  76        -19.1079f,  10.8962f,
  77        -18.6490f,  16.6711f,
  78         12.4754f, -25.4246f,
  79         12.4327f, -24.3002f};
  80     topology topology;
  81     topology.add(input_layout("input", input.get_layout()));
  82     topology.add(data("mean", mean));
  83     topology.add(data("variance", variance));
  84     topology.add(batch_norm("batch_norm", "input", "mean", "variance", epsilon));
  85
  86     network network(engine, topology); // without additional reorders we would get an exception here
  87     network.set_input_data("input", input);
  88
  89     EXPECT_EQ(network.get_all_primitive_org_ids().size(), size_t(5));
  90
  91     auto outputs = network.execute();
  92
  93     auto output = outputs.at("batch_norm").get_memory().pointer<float>();
  94     for (int i = 0; i < 2 * 2 * 3 * 2; i++)
  95     {
  96         EXPECT_NEAR(expected_out[i], output[i], epsilon);
  97     }
  98 }
  99
 100 //concatenation of incompatible convolutions
 101 TEST(add_reorders_gpu, two_convolutions_and_concatenation) {
 102     const auto& engine = get_test_engine();
 103     build_options build_opt;
 104     build_opt.set_option(build_option::optimize_data(false));
 105
 106     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
 107     auto weights1 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 1, 2 } });
 108     auto weights2 = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 1, 2 } });
 109
 110     set_values(input, { 1.1f, 1.2f, 1.3f, 1.4f });
 111     set_values(weights1, { 2.1f, 3.1f});
 112     set_values(weights2, { 1.1f, 0.1f});
 113
 114     topology topology;
 115     topology.add(input_layout("input", input.get_layout()));
 116     topology.add(data("weights1", weights1));
 117     topology.add(data("weights2", weights2));
 118
 119     topology.add(cldnn::convolution("conv1", { "input" }, { "weights1" }));
 120     topology.add(cldnn::reorder("reorder", "input", cldnn::layout(data_types::f32, format::byxf, tensor(4))));
 121     topology.add(cldnn::convolution("conv2", { "reorder" }, { "weights2" }));
 122
 123     topology.add(cldnn::concatenation("concat", { "conv1", "conv2" }, cldnn::concatenation::along_f));
 124
 125     network network(engine, topology, build_opt);
 126     network.set_input_data("input", input);
 127
 128     //concatenation accepts inputs in different formats, so no reorders should be added here
 129     EXPECT_EQ(network.get_all_primitive_org_ids().size(), size_t(7));
 130     auto outputs = network.execute();
 131
 132     float expected_out[] = { 6.34f, 1.34f, 6.86f, 1.46f };
 133     float epsilon = 1e-3f;
 134
 135     for (auto& it : outputs)
 136     {
 137         auto output = it.second.get_memory().pointer<float>();
 138         for (size_t cntr = 0; cntr < 2 * 2; cntr++)
 139         {
 140             EXPECT_NEAR(expected_out[cntr], output[cntr], epsilon);
 141         }
 142     }
 143 }
 144
 145 template<typename data_t>
 146 void tile_ref(const memory& input, memory& output, tile::tile_axis axis, int num_tiles)
 147 {
 148     auto get_sizes = [](const tensor& size, tile::tile_axis axis) -> std::pair<int, int>
 149     {
 150         switch (axis)
 151         {
 152         case tile::along_b: return std::make_pair(1, size.batch[0] * size.feature[0] * size.spatial[2] * size.spatial[1] * size.spatial[0]);
 153         case tile::along_f: return std::make_pair(size.batch[0], size.feature[0] * size.spatial[2] * size.spatial[1] * size.spatial[0]);
 154         case tile::along_z: return std::make_pair(size.batch[0] * size.feature[0], size.spatial[2] * size.spatial[1] * size.spatial[0]);
 155         case tile::along_y: return std::make_pair(size.batch[0] * size.feature[0] * size.spatial[2], size.spatial[1] * size.spatial[0]);
 156         case tile::along_x: return std::make_pair(size.batch[0] * size.feature[0] * size.spatial[2] * size.spatial[1], size.spatial[0]);
 157         default: throw std::invalid_argument("Invalid axis(" + std::to_string(static_cast<int>(axis)) + ") in tile ref version");
 158         }
 159     };
 160
 161     const pointer<data_t> src = input.pointer<data_t>();
 162     pointer<data_t> dst = output.pointer<data_t>();
 163
 164     const data_t* psrc = src.data();
 165     data_t* pdst = dst.data();
 166
 167     auto sizes = get_sizes(input.get_layout().size, axis);
 168     int outer_dim = sizes.first;
 169     int inner_dim = sizes.second;
 170
 171     for (int i = 0; i < outer_dim; i++)
 172     {
 173         for (int t = 0; t < num_tiles; t++)
 174         {
 175             for (int j = 0; j < inner_dim; j++)
 176             {
 177                 pdst[j] = psrc[j];
 178             }
 179             pdst += inner_dim;
 180         }
 181         psrc += inner_dim;
 182     }
 183 }
 184
 185 TEST(add_reorders_gpu, basic_reshape_and_tile) {
 186     const auto& engine = get_test_engine();
 187
 188     auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 2, 2, 1 } });
 189     auto output_ref = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 1, 4, 2 } });
 190
 191     topology topology;
 192     topology.add(input_layout("input", input.get_layout()));
 193     topology.add(reshape("reshape", "input", tensor(2, 1, 2, 1)));
 194     topology.add(tile("tile", "reshape", tile::along_y, 4));
 195
 196     std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f };
 197     set_values(input, input_vec);
 198     tile_ref<float>(input, output_ref, tile::along_y, 4);
 199
 200     network network(engine, topology);
 201     network.set_input_data("input", input);
 202
 203     //reorder is required as tile accepts only bfyx format
 204     EXPECT_EQ(network.get_all_primitive_org_ids().size(), size_t(4));
 205     auto outputs = network.execute();
 206
 207     auto output = outputs.at("tile").get_memory();
 208     auto output_ptr = output.pointer<float>();
 209     auto output_ref_ptr = output_ref.pointer<float>();
 210
 211     for (unsigned int i = 0; i < output_ref.count(); ++i) {
 212         EXPECT_EQ(output_ptr[i], output_ref_ptr[i]);
 213     }
 214 }