1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include <gtest/gtest.h>
6 #include <gmock/gmock-spec-builders.h>
7 #include "mkldnn_plugin/mkldnn_graph.h"
9 #include "test_graph.hpp"
11 #include "single_layer_common.hpp"
12 #include <mkldnn_plugin/mkldnn_extension_utils.h>
13 #include "tests_common.hpp"
15 using namespace ::testing;
17 using namespace mkldnn;
30 struct dw_conv_fusing_test_params {
42 template <typename data_t>
43 void ref_conv(const InferenceEngine::TBlob<data_t> &src, const data_t *weights, const size_t weightsSize,
44 InferenceEngine::TBlob<data_t> &dst, conv_params prm, float negative_slope) {
45 size_t KW = prm.krn_w;
46 size_t KH = prm.krn_h;
47 size_t GC = prm.grp_c;
49 size_t IC = src.dims()[1];
50 size_t IH = src.dims()[2];
51 size_t IW = src.dims()[3];
53 size_t OW = (IW + 2 * prm.pad_w - prm.krn_w) / prm.str_w + 1;
54 size_t OH = (IH + 2 * prm.pad_h - prm.krn_h) / prm.str_h + 1;
55 size_t OC = prm.out_c;
57 const data_t *src_data = src.readOnly();
58 const data_t *weights_data = weights;
59 const data_t *bias_data = weights_data + KW * KH * OC * IC / GC;
60 data_t *dst_data = dst.data();
62 IE_ASSERT(KW * KH * OC * IC / GC + OC == weightsSize);
64 for (uint32_t g = 0; g < GC; g++) {
65 for (uint32_t oc = 0; oc < OC / GC; oc++) {
66 for (uint32_t oh = 0; oh < OH; oh++) {
67 for (uint32_t ow = 0; ow < OW; ow++) {
68 size_t oidx = g * OC / GC * OH * OW
69 + oc * OH * OW + oh * OW + ow;
70 dst_data[oidx] = bias_data[g * OC / GC + oc];
72 for (size_t ic = 0; ic < IC / GC; ic++) {
73 for (size_t kh = 0; kh < KH; kh++) {
74 for (size_t kw = 0; kw < KW; kw++) {
75 int32_t iw = ow * prm.str_w - prm.pad_w + kw;
76 int32_t ih = oh * prm.str_h - prm.pad_h + kh;
77 if (iw < 0 || iw >= (int32_t)IW || ih < 0
80 size_t iidx = g * IC / GC * IH * IW
81 + ic * IH * IW + ih * IW + iw;
82 size_t widx = g * OC / GC * IC / GC * KH * KW
83 + oc * IC / GC * KH * KW
84 + ic * KH * KW + kh * KW + kw;
86 dst_data[oidx] += src_data[iidx] * weights_data[widx];
91 if (dst_data[oidx] < 0)
92 dst_data[oidx] *= negative_slope;
99 class MKLDNNGraphDWConvFusingTests: public TestsCommon,
100 public WithParamInterface<dw_conv_fusing_test_params> {
101 std::string model_t = R"V0G0N(
102 <Net Name="Convolution_Only" version="2" precision="FP32" batch="1">
104 <layer name="in1" type="Input" precision="FP32" id="0">
114 <layer name="conv1" id="1" type="Convolution" precision="FP32">
115 <convolution stride-x="_C1_SW_" stride-y="_C1_SH_"
116 pad-x="_C1_PW_" pad-y="_C1_PH_"
117 kernel-x="_C1_KW_" kernel-y="_C1_KH_"
118 output="_C1_OC_" group="_C1_GC_"/>
120 <weights offset="0" size="_C1_S1_" />
121 <biases offset="_C1_S1_" size="_C1_S2_" />
139 <layer name="relu1" id="2" type="ReLU" precision="FP32">
140 <data negative_slope="0"/>
158 <layer name="conv2" id="3" type="Convolution" precision="FP32">
159 <convolution stride-x="_C2_SW_" stride-y="_C2_SH_"
160 pad-x="_C2_PW_" pad-y="_C2_PH_"
161 kernel-x="_C2_KW_" kernel-y="_C2_KH_"
162 output="_C2_OC_" group="_C2_GC_"/>
164 <weights offset="_C2_S0_" size="_C2_S1_" />
165 <biases offset="_C2_S2_" size="_C2_S3_" />
183 <layer name="relu2" id="4" type="ReLU" precision="FP32">
184 <data negative_slope="0"/>
204 <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
205 <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
206 <edge from-layer="2" from-port="4" to-layer="3" to-port="5"/>
207 <edge from-layer="3" from-port="6" to-layer="4" to-port="7"/>
212 std::string getModel(dw_conv_fusing_test_params p) {
213 std::string model = model_t;
214 REPLACE_WITH_NUM(model, "_IW_", p.in.w);
215 REPLACE_WITH_NUM(model, "_IH_", p.in.h);
216 REPLACE_WITH_NUM(model, "_IC_", p.in.c);
217 REPLACE_WITH_NUM(model, "_IN_", p.in.n);
219 REPLACE_WITH_NUM(model, "_C1_KW_", p.conv1.krn_w);
220 REPLACE_WITH_NUM(model, "_C1_KH_", p.conv1.krn_h);
221 REPLACE_WITH_NUM(model, "_C1_SW_", p.conv1.str_w);
222 REPLACE_WITH_NUM(model, "_C1_SH_", p.conv1.str_h);
223 REPLACE_WITH_NUM(model, "_C1_PW_", p.conv1.pad_w);
224 REPLACE_WITH_NUM(model, "_C1_PH_", p.conv1.pad_h);
225 REPLACE_WITH_NUM(model, "_C1_GC_", p.conv1.grp_c);
226 REPLACE_WITH_NUM(model, "_C1_OC_", p.conv1.out_c);
227 size_t c1_oh = (p.in.h + 2 * p.conv1.pad_h - p.conv1.krn_h) / p.conv1.str_h + 1;
228 size_t c1_ow = (p.in.w + 2 * p.conv1.pad_w - p.conv1.krn_w) / p.conv1.str_w + 1;
229 REPLACE_WITH_NUM(model, "_C1_OH_", c1_oh);
230 REPLACE_WITH_NUM(model, "_C1_OW_", c1_ow);
232 size_t conv1_w_data_size = (p.conv1.krn_w * p.conv1.krn_h * p.conv1.out_c * p.in.c / p.conv1.grp_c) * sizeof(float);
233 size_t conv1_b_data_size = p.conv1.out_c * sizeof(float);
234 REPLACE_WITH_NUM(model, "_C1_S1_", conv1_w_data_size);
235 REPLACE_WITH_NUM(model, "_C1_S2_", conv1_b_data_size);
237 REPLACE_WITH_NUM(model, "_C2_KW_", p.conv2.krn_w);
238 REPLACE_WITH_NUM(model, "_C2_KH_", p.conv2.krn_h);
239 REPLACE_WITH_NUM(model, "_C2_SW_", p.conv2.str_w);
240 REPLACE_WITH_NUM(model, "_C2_SH_", p.conv2.str_h);
241 REPLACE_WITH_NUM(model, "_C2_PW_", p.conv2.pad_w);
242 REPLACE_WITH_NUM(model, "_C2_PH_", p.conv2.pad_h);
243 REPLACE_WITH_NUM(model, "_C2_GC_", p.conv2.grp_c);
244 REPLACE_WITH_NUM(model, "_C2_OC_", p.conv2.out_c);
245 REPLACE_WITH_NUM(model, "_C2_OH_", (c1_oh + 2 * p.conv2.pad_h - p.conv2.krn_h) / p.conv2.str_h + 1);
246 REPLACE_WITH_NUM(model, "_C2_OW_", (c1_ow + 2 * p.conv2.pad_w - p.conv2.krn_w) / p.conv2.str_w + 1);
248 size_t conv2_w_data_size = (p.conv2.krn_w * p.conv2.krn_h * p.conv2.out_c * p.conv1.out_c / p.conv2.grp_c) * sizeof(float);
249 size_t conv2_b_data_size = p.conv2.out_c * sizeof(float);
250 REPLACE_WITH_NUM(model, "_C2_S0_", conv1_w_data_size + conv1_b_data_size);
251 REPLACE_WITH_NUM(model, "_C2_S1_", conv2_w_data_size);
252 REPLACE_WITH_NUM(model, "_C2_S2_", conv1_w_data_size + conv1_b_data_size + conv2_w_data_size);
253 REPLACE_WITH_NUM(model, "_C2_S3_", conv2_b_data_size);
258 virtual void TearDown() {
261 virtual void SetUp() {
263 TestsCommon::SetUp();
264 dw_conv_fusing_test_params p = ::testing::WithParamInterface<dw_conv_fusing_test_params>::GetParam();
265 std::string model = getModel(p);
267 InferenceEngine::CNNNetReader net_reader;
268 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
270 size_t conv1_w_size = p.conv1.krn_w * p.conv1.krn_h * p.conv1.out_c * p.in.c / p.conv1.grp_c + p.conv1.out_c; // conv1 weights + biases
271 size_t conv2_w_size = p.conv2.krn_w * p.conv2.krn_h * p.conv2.out_c * p.conv1.out_c / p.conv2.grp_c + p.conv2.out_c; // conv2 weights + biases
273 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {(conv1_w_size+conv2_w_size) * sizeof(float)});
275 fill_data((float *) weights->buffer(), weights->size() / sizeof(float), 1);
276 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
278 net_reader.SetWeights(weights_ptr);
280 MKLDNNGraphTestClass graph;
281 graph.CreateGraph(net_reader.getNetwork());
283 InferenceEngine::SizeVector dims_src = {p.in.n, p.in.c, p.in.h, p.in.w};
285 InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, InferenceEngine::NCHW, dims_src);
287 fill_data(src->buffer(), src->size());
289 auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
291 if (srcPtr == nullptr)
292 FAIL() << "Cannot cast blob to TBlob<float>.";
294 InferenceEngine::BlobMap srcs;
295 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
297 InferenceEngine::OutputsDataMap out;
298 out = net_reader.getNetwork().getOutputsInfo();
299 InferenceEngine::BlobMap outputBlobs;
301 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
303 InferenceEngine::TBlob<float>::Ptr output;
304 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
306 outputBlobs[item.first] = output;
308 graph.Infer(srcs, outputBlobs);
310 size_t c1_oh = (p.in.h + 2 * p.conv1.pad_h - p.conv1.krn_h) / p.conv1.str_h + 1;
311 size_t c1_ow = (p.in.w + 2 * p.conv1.pad_w - p.conv1.krn_w) / p.conv1.str_w + 1;
312 InferenceEngine::TBlob<float> conv1_dst_ref(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {c1_ow, c1_oh, p.conv1.out_c, p.in.n}, InferenceEngine::NCHW));
313 conv1_dst_ref.allocate();
315 size_t c2_oh = (c1_oh + 2 * p.conv2.pad_h - p.conv2.krn_h) / p.conv2.str_h + 1;
316 size_t c2_ow = (c1_ow + 2 * p.conv2.pad_w - p.conv2.krn_w) / p.conv2.str_w + 1;
317 InferenceEngine::TBlob<float> conv2_dst_ref(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {c2_ow, c2_oh, p.conv2.out_c, p.in.n}, InferenceEngine::NCHW));
318 conv2_dst_ref.allocate();
320 ref_conv(*srcPtr, (const float *)weights->buffer(), conv1_w_size, conv1_dst_ref, p.conv1, 0.0f);
321 ref_conv(conv1_dst_ref, (const float *)weights->buffer() + conv1_w_size, conv2_w_size, conv2_dst_ref, p.conv2, 0.0f);
324 compare(*output, conv2_dst_ref);
325 } catch (const InferenceEngine::details::InferenceEngineException &e) {
331 TEST_P(MKLDNNGraphDWConvFusingTests, TestsDwConvFusing) {}
333 INSTANTIATE_TEST_CASE_P(
334 TestsDwConvFusing, MKLDNNGraphDWConvFusingTests,
336 dw_conv_fusing_test_params{{1, 32, 160, 320}, {1, 1, 1, 1, 0, 0, 24, 1}, {3, 3, 1, 1, 1, 1, 24, 24}}