inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/graph_manipulation_gpu_test.cpp

   1 /*
   2 // Copyright (c) 2019 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 #include <memory>
  18
  19 #include <gtest/gtest.h>
  20
  21 #include "program_impl.h"
  22 #include "api_impl.h"
  23 #include "topology_impl.h"
  24 #include "engine_impl.h"
  25 #include "memory_impl.h"
  26 #include "data_inst.h"
  27 #include "activation_inst.h"
  28 #include "convolution_inst.h"
  29 #include "crop_inst.h"
  30 #include "network_impl.h"
  31 #include "reshape_inst.h"
  32 #include "pass_manager.h"
  33
  34 #include "test_utils.h"
  35 #include "program_impl_wrapper.h"
  36
  37 using namespace cldnn;
  38 using namespace ::tests;
  39
  40 /* Basic test to show how the program can be build and run within internal tests
  41    in similar way as it is done in tests utilizing clDNN API */
  42 TEST(basic, test1) {
  43     const auto& engine = get_test_engine();
  44     build_options build_opt;
  45     build_opt.set_option(build_option::optimize_data(true));
  46
  47     auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
  48     auto weights1 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 1 } });
  49     auto weights2 = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 1, 2 } });
  50
  51     set_values(input, { FLOAT16(1.1f), FLOAT16(1.2f), FLOAT16(1.3f), FLOAT16(1.4f) });
  52     set_values(weights1, { FLOAT16(2.1f), FLOAT16(3.1f) });
  53     set_values(weights2, { 1.1f, 0.1f });
  54
  55     topology topology;
  56     topology.add(input_layout("input", input.get_layout()));
  57     topology.add(data("weights1", weights1));
  58     topology.add(data("weights2", weights2));
  59     topology.add(reshape("reshape1", "weights1", tensor(spatial(1, 2))));
  60     topology.add(reorder("reorder2", "input", layout(data_types::f32, format::byxf, 4)));
  61     topology.add(reorder("reorder1", "reshape1", layout(data_types::f32, format::byxf, 4)));
  62     topology.add(concatenation("concat", { "reorder1", "weights2" }, concatenation::along_x));
  63     topology.add(convolution("conv2", { "reorder2" }, { "concat" }));
  64
  65     program_impl::ptr prog = api_cast(engine.get())->build_program(*api_cast(topology.get()), build_opt, false);
  66     cldnn::refcounted_obj_ptr<cldnn::network_impl> net = api_cast(engine.get())->allocate_network(*prog);
  67     network network = api_cast(net.get());
  68
  69     network.set_input_data("input", input);
  70
  71     auto outputs = network.execute();
  72
  73     float epsilon = 1e-2f;
  74     for (auto& it : outputs)
  75     {
  76         auto output = it.second.get_memory().pointer<float>();
  77         EXPECT_NEAR(7.8f, output[0], epsilon);
  78     }
  79 }
  80
  81 /*
  82     This test creates a program without optimization passes, even the compilation is being run manualy.
  83     Thus, a single method from program_impl like add_intermediate might be tested separately.
  84 */
  85 TEST(add_intermediate_gpu, test1)
  86 {
  87     build_options build_opt;
  88     topology topology;
  89     engine engine;
  90
  91     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {2, 2, 2, 2} });
  92     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, {2, 2, 2, 2} });
  93     auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 1, 1 } });
  94
  95     set_values(input, { (1.1f), (1.2f), (1.3f), (1.4f),
  96                         (2.1f), (2.2f), (2.3f), (2.4f),
  97                         (3.1f), (3.2f), (3.3f), (3.4f),
  98                         (4.1f), (4.2f), (4.3f), (4.4f) });
  99     set_values(weights, { (1.5f), (1.6f), (1.7f), (1.8f),
 100                           (2.5f), (2.6f), (2.7f), (2.8f),
 101                           (3.5f), (3.6f), (3.7f), (3.8f),
 102                           (4.5f), (4.6f), (4.7f), (4.8f) });
 103
 104     set_values(weights2, { (5.5f), (5.6f), (5.7f), (5.8f) });
 105     topology.add(input_layout("input", input.get_layout()));
 106     topology.add(data("weights", weights));
 107     topology.add(data("weights2", weights2));
 108     topology.add(cldnn::convolution("conv1a", { "input" }, { "weights" }));
 109     topology.add(cldnn::convolution("conv1b", { "input" }, { "weights" }));
 110     topology.add(cldnn::convolution("conv2a", { "conv1a" }, { "weights2" }));
 111     auto new_reorder = std::make_shared<reorder>("reorder","nothing", input.get_layout());
 112     program_impl::ptr prog = api_cast(engine.get())->build_program(*api_cast(topology.get()), build_opt, false, true);
 113     prog->add_intermediate(new_reorder, prog->get_node("conv1a"), 0);
 114     prog->dump_program("custom_dump", true);
 115
 116     pass_manager pm;
 117     compile_graph compile_graph_pass;
 118     pm.run(*prog, compile_graph_pass);
 119
 120     cldnn::refcounted_obj_ptr<cldnn::network_impl> net = api_cast(engine.get())->allocate_network(*prog);
 121     network network = api_cast(net.get());
 122     network.set_input_data("input", input);
 123     auto outputs = network.execute();
 124
 125     std::vector<float> expected_output_vec = {
 126         32.2f, 60.2f, 66.6f, 126.6f,
 127         514.22f, 532.7f, 1075.26f, 1113.9f
 128     };
 129
 130     uint32_t output_size = 4;
 131     uint32_t output_index = 0;
 132     for (auto& it : outputs)
 133     {
 134         auto output = it.second.get_memory().pointer<float>();
 135         for (uint32_t x = 0; x < output_size; x++)
 136         {
 137             EXPECT_FLOAT_EQ(expected_output_vec[x+output_size*output_index], output[x]);
 138         }
 139         output_index++;
 140     }
 141 }
 142
 143 /* This test shows how to use private members (here: add_connection) of program_impl using program_impl_wraper */
 144 TEST(add_intermediate_gpu, test2)
 145 {
 146     build_options build_opt;
 147     topology topology;
 148     engine engine;
 149
 150     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
 151     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
 152     auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 1, 1 } });
 153
 154     set_values(input, { (1.1f), (1.2f), (1.3f), (1.4f),
 155         (2.1f), (2.2f), (2.3f), (2.4f),
 156         (3.1f), (3.2f), (3.3f), (3.4f),
 157         (4.1f), (4.2f), (4.3f), (4.4f) });
 158     set_values(weights, { (1.5f), (1.6f), (1.7f), (1.8f),
 159         (2.5f), (2.6f), (2.7f), (2.8f),
 160         (3.5f), (3.6f), (3.7f), (3.8f),
 161         (4.5f), (4.6f), (4.7f), (4.8f) });
 162
 163     set_values(weights2, { (5.5f), (5.6f), (5.7f), (5.8f) });
 164
 165     topology.add(input_layout("input", input.get_layout()));
 166     topology.add(data("weights2", weights2));
 167
 168     topology.add(cldnn::convolution("conv2a", { "input" }, { "weights2" }));
 169     topology.add(cldnn::convolution("conv2b", { "input" }, { "weights2" }));
 170
 171     std::vector<primitive_id> w_vec;
 172     w_vec.push_back("weights");
 173     auto new_conv = std::make_shared<convolution>("conv1a", "input", w_vec);
 174     auto weights_node = std::make_shared<data>("weights", weights);
 175     program_impl::ptr prog = api_cast(engine.get())->build_program(*api_cast(topology.get()), build_opt, false, true);
 176
 177     prog->add_intermediate(new_conv, prog->get_node("conv2a"), 0, true, true);
 178     program_impl_wrapper::add_connection(*prog, prog->get_or_create(weights_node), prog->get_or_create(new_conv));
 179     prog->dump_program("custom_dump", true);
 180
 181     pass_manager pm;
 182     compile_graph compile_graph_pass;
 183     pm.run(*prog, compile_graph_pass);
 184
 185     cldnn::refcounted_obj_ptr<cldnn::network_impl> net = api_cast(engine.get())->allocate_network(*prog);
 186     network network = api_cast(net.get());
 187     network.set_input_data("input", input);
 188     auto outputs = network.execute();
 189
 190     std::vector<float> expected_output_vec = {
 191         514.22f, 532.7f, 1075.26f, 1113.9f
 192     };
 193
 194     uint32_t output_size = 4;
 195     for (auto& it : outputs)
 196     {
 197         auto output = it.second.get_memory().pointer<float>();
 198         for (uint32_t x = 0; x < output_size; x++)
 199         {
 200             EXPECT_FLOAT_EQ(expected_output_vec[x], output[x]);
 201         }
 202     }
 203 }