2 // Copyright (c) 2019 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include <gtest/gtest.h>
21 #include "program_impl.h"
23 #include "topology_impl.h"
24 #include "engine_impl.h"
25 #include "memory_impl.h"
26 #include "data_inst.h"
27 #include "activation_inst.h"
28 #include "convolution_inst.h"
29 #include "crop_inst.h"
30 #include "network_impl.h"
31 #include "reshape_inst.h"
32 #include "pass_manager.h"
34 #include "test_utils.h"
35 #include "program_impl_wrapper.h"
37 using namespace cldnn;
38 using namespace ::tests;
40 /* Basic test to show how the program can be build and run within internal tests
41 in similar way as it is done in tests utilizing clDNN API */
43 const auto& engine = get_test_engine();
44 build_options build_opt;
45 build_opt.set_option(build_option::optimize_data(true));
47 auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
48 auto weights1 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 1 } });
49 auto weights2 = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 1, 2 } });
51 set_values(input, { FLOAT16(1.1f), FLOAT16(1.2f), FLOAT16(1.3f), FLOAT16(1.4f) });
52 set_values(weights1, { FLOAT16(2.1f), FLOAT16(3.1f) });
53 set_values(weights2, { 1.1f, 0.1f });
56 topology.add(input_layout("input", input.get_layout()));
57 topology.add(data("weights1", weights1));
58 topology.add(data("weights2", weights2));
59 topology.add(reshape("reshape1", "weights1", tensor(spatial(1, 2))));
60 topology.add(reorder("reorder2", "input", layout(data_types::f32, format::byxf, 4)));
61 topology.add(reorder("reorder1", "reshape1", layout(data_types::f32, format::byxf, 4)));
62 topology.add(concatenation("concat", { "reorder1", "weights2" }, concatenation::along_x));
63 topology.add(convolution("conv2", { "reorder2" }, { "concat" }));
65 program_impl::ptr prog = api_cast(engine.get())->build_program(*api_cast(topology.get()), build_opt, false);
66 cldnn::refcounted_obj_ptr<cldnn::network_impl> net = api_cast(engine.get())->allocate_network(*prog);
67 network network = api_cast(net.get());
69 network.set_input_data("input", input);
71 auto outputs = network.execute();
73 float epsilon = 1e-2f;
74 for (auto& it : outputs)
76 auto output = it.second.get_memory().pointer<float>();
77 EXPECT_NEAR(7.8f, output[0], epsilon);
82 This test creates a program without optimization passes, even the compilation is being run manualy.
83 Thus, a single method from program_impl like add_intermediate might be tested separately.
85 TEST(add_intermediate_gpu, test1)
87 build_options build_opt;
91 auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {2, 2, 2, 2} });
92 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, {2, 2, 2, 2} });
93 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 1, 1 } });
95 set_values(input, { (1.1f), (1.2f), (1.3f), (1.4f),
96 (2.1f), (2.2f), (2.3f), (2.4f),
97 (3.1f), (3.2f), (3.3f), (3.4f),
98 (4.1f), (4.2f), (4.3f), (4.4f) });
99 set_values(weights, { (1.5f), (1.6f), (1.7f), (1.8f),
100 (2.5f), (2.6f), (2.7f), (2.8f),
101 (3.5f), (3.6f), (3.7f), (3.8f),
102 (4.5f), (4.6f), (4.7f), (4.8f) });
104 set_values(weights2, { (5.5f), (5.6f), (5.7f), (5.8f) });
105 topology.add(input_layout("input", input.get_layout()));
106 topology.add(data("weights", weights));
107 topology.add(data("weights2", weights2));
108 topology.add(cldnn::convolution("conv1a", { "input" }, { "weights" }));
109 topology.add(cldnn::convolution("conv1b", { "input" }, { "weights" }));
110 topology.add(cldnn::convolution("conv2a", { "conv1a" }, { "weights2" }));
111 auto new_reorder = std::make_shared<reorder>("reorder","nothing", input.get_layout());
112 program_impl::ptr prog = api_cast(engine.get())->build_program(*api_cast(topology.get()), build_opt, false, true);
113 prog->add_intermediate(new_reorder, prog->get_node("conv1a"), 0);
114 prog->dump_program("custom_dump", true);
117 compile_graph compile_graph_pass;
118 pm.run(*prog, compile_graph_pass);
120 cldnn::refcounted_obj_ptr<cldnn::network_impl> net = api_cast(engine.get())->allocate_network(*prog);
121 network network = api_cast(net.get());
122 network.set_input_data("input", input);
123 auto outputs = network.execute();
125 std::vector<float> expected_output_vec = {
126 32.2f, 60.2f, 66.6f, 126.6f,
127 514.22f, 532.7f, 1075.26f, 1113.9f
130 uint32_t output_size = 4;
131 uint32_t output_index = 0;
132 for (auto& it : outputs)
134 auto output = it.second.get_memory().pointer<float>();
135 for (uint32_t x = 0; x < output_size; x++)
137 EXPECT_FLOAT_EQ(expected_output_vec[x+output_size*output_index], output[x]);
143 /* This test shows how to use private members (here: add_connection) of program_impl using program_impl_wraper */
144 TEST(add_intermediate_gpu, test2)
146 build_options build_opt;
150 auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
151 auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
152 auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 1, 1 } });
154 set_values(input, { (1.1f), (1.2f), (1.3f), (1.4f),
155 (2.1f), (2.2f), (2.3f), (2.4f),
156 (3.1f), (3.2f), (3.3f), (3.4f),
157 (4.1f), (4.2f), (4.3f), (4.4f) });
158 set_values(weights, { (1.5f), (1.6f), (1.7f), (1.8f),
159 (2.5f), (2.6f), (2.7f), (2.8f),
160 (3.5f), (3.6f), (3.7f), (3.8f),
161 (4.5f), (4.6f), (4.7f), (4.8f) });
163 set_values(weights2, { (5.5f), (5.6f), (5.7f), (5.8f) });
165 topology.add(input_layout("input", input.get_layout()));
166 topology.add(data("weights2", weights2));
168 topology.add(cldnn::convolution("conv2a", { "input" }, { "weights2" }));
169 topology.add(cldnn::convolution("conv2b", { "input" }, { "weights2" }));
171 std::vector<primitive_id> w_vec;
172 w_vec.push_back("weights");
173 auto new_conv = std::make_shared<convolution>("conv1a", "input", w_vec);
174 auto weights_node = std::make_shared<data>("weights", weights);
175 program_impl::ptr prog = api_cast(engine.get())->build_program(*api_cast(topology.get()), build_opt, false, true);
177 prog->add_intermediate(new_conv, prog->get_node("conv2a"), 0, true, true);
178 program_impl_wrapper::add_connection(*prog, prog->get_or_create(weights_node), prog->get_or_create(new_conv));
179 prog->dump_program("custom_dump", true);
182 compile_graph compile_graph_pass;
183 pm.run(*prog, compile_graph_pass);
185 cldnn::refcounted_obj_ptr<cldnn::network_impl> net = api_cast(engine.get())->allocate_network(*prog);
186 network network = api_cast(net.get());
187 network.set_input_data("input", input);
188 auto outputs = network.execute();
190 std::vector<float> expected_output_vec = {
191 514.22f, 532.7f, 1075.26f, 1113.9f
194 uint32_t output_size = 4;
195 for (auto& it : outputs)
197 auto output = it.second.get_memory().pointer<float>();
198 for (uint32_t x = 0; x < output_size; x++)
200 EXPECT_FLOAT_EQ(expected_output_vec[x], output[x]);