Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / tests / unit / engines / mkldnn / graph / structure / graph_dw_conv_fusing_test.cpp
1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include <gtest/gtest.h>
6 #include <gmock/gmock-spec-builders.h>
7 #include "mkldnn_plugin/mkldnn_graph.h"
8
9 #include "test_graph.hpp"
10
11 #include "single_layer_common.hpp"
12 #include <mkldnn_plugin/mkldnn_extension_utils.h>
13 #include "tests_common.hpp"
14
15 using namespace ::testing;
16 using namespace std;
17 using namespace mkldnn;
18
19 struct conv_params {
20     size_t krn_w;
21     size_t krn_h;
22     size_t str_w;
23     size_t str_h;
24     size_t pad_w;
25     size_t pad_h;
26     size_t out_c;
27     size_t grp_c;
28 };
29
30 struct dw_conv_fusing_test_params {
31     struct {
32         size_t n;
33         size_t c;
34         size_t h;
35         size_t w;
36     } in;
37
38     conv_params conv1;
39     conv_params conv2;
40 };
41
42 template <typename data_t>
43 void ref_conv(const InferenceEngine::TBlob<data_t> &src, const data_t *weights, const size_t weightsSize,
44               InferenceEngine::TBlob<data_t> &dst, conv_params prm, float negative_slope) {
45     size_t KW = prm.krn_w;
46     size_t KH = prm.krn_h;
47     size_t GC = prm.grp_c;
48
49     size_t IC = src.dims()[1];
50     size_t IH = src.dims()[2];
51     size_t IW = src.dims()[3];
52
53     size_t OW = (IW + 2 * prm.pad_w - prm.krn_w) / prm.str_w + 1;
54     size_t OH = (IH + 2 * prm.pad_h - prm.krn_h) / prm.str_h + 1;
55     size_t OC = prm.out_c;
56
57     const data_t *src_data = src.readOnly();
58     const data_t *weights_data = weights;
59     const data_t *bias_data = weights_data + KW * KH * OC * IC / GC;
60     data_t *dst_data = dst.data();
61
62     IE_ASSERT(KW * KH * OC * IC / GC + OC == weightsSize);
63
64     for (uint32_t g = 0; g < GC; g++) {
65         for (uint32_t oc = 0; oc < OC / GC; oc++) {
66             for (uint32_t oh = 0; oh < OH; oh++) {
67                 for (uint32_t ow = 0; ow < OW; ow++) {
68                     size_t oidx = g * OC / GC * OH * OW
69                                   + oc * OH * OW + oh * OW + ow;
70                     dst_data[oidx] = bias_data[g * OC / GC + oc];
71
72                     for (size_t ic = 0; ic < IC / GC; ic++) {
73                         for (size_t kh = 0; kh < KH; kh++) {
74                             for (size_t kw = 0; kw < KW; kw++) {
75                                 int32_t iw = ow * prm.str_w - prm.pad_w + kw;
76                                 int32_t ih = oh * prm.str_h - prm.pad_h + kh;
77                                 if (iw < 0 || iw >= (int32_t)IW || ih < 0
78                                     || ih >= (int32_t)IH)
79                                     continue;
80                                 size_t iidx = g * IC / GC * IH * IW
81                                               + ic * IH * IW + ih * IW + iw;
82                                 size_t widx = g * OC / GC * IC / GC * KH * KW
83                                               + oc * IC / GC * KH * KW
84                                               + ic * KH * KW + kh * KW + kw;
85
86                                 dst_data[oidx] += src_data[iidx] * weights_data[widx];
87                             }
88                         }
89                     }
90
91                     if (dst_data[oidx] < 0)
92                         dst_data[oidx] *= negative_slope;
93                 }
94             }
95         }
96     }
97 }
98
99 class MKLDNNGraphDWConvFusingTests: public TestsCommon,
100                                     public WithParamInterface<dw_conv_fusing_test_params> {
101     std::string model_t = R"V0G0N(
102 <Net Name="Convolution_Only" version="2" precision="FP32" batch="1">
103     <layers>
104         <layer name="in1" type="Input" precision="FP32" id="0">
105             <output>
106                 <port id="0">
107                     <dim>_IN_</dim>
108                     <dim>_IC_</dim>
109                     <dim>_IH_</dim>
110                     <dim>_IW_</dim>
111                 </port>
112             </output>
113         </layer>
114         <layer name="conv1" id="1" type="Convolution" precision="FP32">
115             <convolution stride-x="_C1_SW_" stride-y="_C1_SH_"
116                          pad-x="_C1_PW_"    pad-y="_C1_PH_"
117                          kernel-x="_C1_KW_" kernel-y="_C1_KH_"
118                          output="_C1_OC_"   group="_C1_GC_"/>
119
120             <weights offset="0" size="_C1_S1_" />
121             <biases offset="_C1_S1_" size="_C1_S2_" />
122             <input>
123                 <port id="1">
124                     <dim>_IN_</dim>
125                     <dim>_IC_</dim>
126                     <dim>_IH_</dim>
127                     <dim>_IW_</dim>
128                 </port>
129             </input>
130             <output>
131                 <port id="2">
132                     <dim>_IN_</dim>
133                     <dim>_C1_OC_</dim>
134                     <dim>_C1_OH_</dim>
135                     <dim>_C1_OW_</dim>
136                 </port>
137             </output>
138         </layer>
139         <layer name="relu1" id="2" type="ReLU" precision="FP32">
140             <data negative_slope="0"/>
141             <input>
142                 <port id="3">
143                     <dim>_IN_</dim>
144                     <dim>_C1_OC_</dim>
145                     <dim>_C1_OH_</dim>
146                     <dim>_C1_OW_</dim>
147                 </port>
148             </input>
149             <output>
150                 <port id="4">
151                     <dim>_IN_</dim>
152                     <dim>_C1_OC_</dim>
153                     <dim>_C1_OH_</dim>
154                     <dim>_C1_OW_</dim>
155                 </port>
156             </output>
157         </layer>
158         <layer name="conv2" id="3" type="Convolution" precision="FP32">
159             <convolution stride-x="_C2_SW_" stride-y="_C2_SH_"
160                          pad-x="_C2_PW_"    pad-y="_C2_PH_"
161                          kernel-x="_C2_KW_" kernel-y="_C2_KH_"
162                          output="_C2_OC_"   group="_C2_GC_"/>
163
164             <weights offset="_C2_S0_" size="_C2_S1_" />
165             <biases offset="_C2_S2_" size="_C2_S3_" />
166             <input>
167                 <port id="5">
168                     <dim>_IN_</dim>
169                     <dim>_C1_OC_</dim>
170                     <dim>_C1_OH_</dim>
171                     <dim>_C1_OW_</dim>
172                 </port>
173             </input>
174             <output>
175                 <port id="6">
176                     <dim>_IN_</dim>
177                     <dim>_C2_OC_</dim>
178                     <dim>_C2_OH_</dim>
179                     <dim>_C2_OW_</dim>
180                 </port>
181             </output>
182         </layer>
183         <layer name="relu2" id="4" type="ReLU" precision="FP32">
184             <data negative_slope="0"/>
185             <input>
186                 <port id="7">
187                     <dim>_IN_</dim>
188                     <dim>_C2_OC_</dim>
189                     <dim>_C2_OH_</dim>
190                     <dim>_C2_OW_</dim>
191                 </port>
192             </input>
193             <output>
194                 <port id="8">
195                     <dim>_IN_</dim>
196                     <dim>_C2_OC_</dim>
197                     <dim>_C2_OH_</dim>
198                     <dim>_C2_OW_</dim>
199                 </port>
200             </output>
201         </layer>
202     </layers>
203     <edges>
204         <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
205         <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
206         <edge from-layer="2" from-port="4" to-layer="3" to-port="5"/>
207         <edge from-layer="3" from-port="6" to-layer="4" to-port="7"/>
208     </edges>
209 </Net>
210 )V0G0N";
211
212     std::string getModel(dw_conv_fusing_test_params p) {
213         std::string model = model_t;
214         REPLACE_WITH_NUM(model, "_IW_", p.in.w);
215         REPLACE_WITH_NUM(model, "_IH_", p.in.h);
216         REPLACE_WITH_NUM(model, "_IC_", p.in.c);
217         REPLACE_WITH_NUM(model, "_IN_", p.in.n);
218
219         REPLACE_WITH_NUM(model, "_C1_KW_", p.conv1.krn_w);
220         REPLACE_WITH_NUM(model, "_C1_KH_", p.conv1.krn_h);
221         REPLACE_WITH_NUM(model, "_C1_SW_", p.conv1.str_w);
222         REPLACE_WITH_NUM(model, "_C1_SH_", p.conv1.str_h);
223         REPLACE_WITH_NUM(model, "_C1_PW_", p.conv1.pad_w);
224         REPLACE_WITH_NUM(model, "_C1_PH_", p.conv1.pad_h);
225         REPLACE_WITH_NUM(model, "_C1_GC_", p.conv1.grp_c);
226         REPLACE_WITH_NUM(model, "_C1_OC_", p.conv1.out_c);
227         size_t c1_oh = (p.in.h + 2 * p.conv1.pad_h - p.conv1.krn_h) / p.conv1.str_h + 1;
228         size_t c1_ow = (p.in.w + 2 * p.conv1.pad_w - p.conv1.krn_w) / p.conv1.str_w + 1;
229         REPLACE_WITH_NUM(model, "_C1_OH_", c1_oh);
230         REPLACE_WITH_NUM(model, "_C1_OW_", c1_ow);
231
232         size_t conv1_w_data_size = (p.conv1.krn_w * p.conv1.krn_h * p.conv1.out_c * p.in.c / p.conv1.grp_c) * sizeof(float);
233         size_t conv1_b_data_size = p.conv1.out_c * sizeof(float);
234         REPLACE_WITH_NUM(model, "_C1_S1_", conv1_w_data_size);
235         REPLACE_WITH_NUM(model, "_C1_S2_", conv1_b_data_size);
236
237         REPLACE_WITH_NUM(model, "_C2_KW_", p.conv2.krn_w);
238         REPLACE_WITH_NUM(model, "_C2_KH_", p.conv2.krn_h);
239         REPLACE_WITH_NUM(model, "_C2_SW_", p.conv2.str_w);
240         REPLACE_WITH_NUM(model, "_C2_SH_", p.conv2.str_h);
241         REPLACE_WITH_NUM(model, "_C2_PW_", p.conv2.pad_w);
242         REPLACE_WITH_NUM(model, "_C2_PH_", p.conv2.pad_h);
243         REPLACE_WITH_NUM(model, "_C2_GC_", p.conv2.grp_c);
244         REPLACE_WITH_NUM(model, "_C2_OC_", p.conv2.out_c);
245         REPLACE_WITH_NUM(model, "_C2_OH_", (c1_oh + 2 * p.conv2.pad_h - p.conv2.krn_h) / p.conv2.str_h + 1);
246         REPLACE_WITH_NUM(model, "_C2_OW_", (c1_ow + 2 * p.conv2.pad_w - p.conv2.krn_w) / p.conv2.str_w + 1);
247
248         size_t conv2_w_data_size = (p.conv2.krn_w * p.conv2.krn_h * p.conv2.out_c * p.conv1.out_c / p.conv2.grp_c) * sizeof(float);
249         size_t conv2_b_data_size = p.conv2.out_c * sizeof(float);
250         REPLACE_WITH_NUM(model, "_C2_S0_", conv1_w_data_size + conv1_b_data_size);
251         REPLACE_WITH_NUM(model, "_C2_S1_", conv2_w_data_size);
252         REPLACE_WITH_NUM(model, "_C2_S2_", conv1_w_data_size + conv1_b_data_size + conv2_w_data_size);
253         REPLACE_WITH_NUM(model, "_C2_S3_", conv2_b_data_size);
254         return model;
255     }
256
257 protected:
258     virtual void TearDown() {
259     }
260
261     virtual void SetUp() {
262         try {
263             TestsCommon::SetUp();
264             dw_conv_fusing_test_params p = ::testing::WithParamInterface<dw_conv_fusing_test_params>::GetParam();
265             std::string model = getModel(p);
266
267             InferenceEngine::CNNNetReader net_reader;
268             ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
269
270             size_t conv1_w_size = p.conv1.krn_w * p.conv1.krn_h * p.conv1.out_c * p.in.c / p.conv1.grp_c + p.conv1.out_c; // conv1 weights + biases
271             size_t conv2_w_size = p.conv2.krn_w * p.conv2.krn_h * p.conv2.out_c * p.conv1.out_c / p.conv2.grp_c + p.conv2.out_c; // conv2 weights + biases
272
273             InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {(conv1_w_size+conv2_w_size) * sizeof(float)});
274             weights->allocate();
275             fill_data((float *) weights->buffer(), weights->size() / sizeof(float), 1);
276             InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
277
278             net_reader.SetWeights(weights_ptr);
279
280             MKLDNNGraphTestClass graph;
281             graph.CreateGraph(net_reader.getNetwork());
282
283             InferenceEngine::SizeVector dims_src = {p.in.n, p.in.c, p.in.h, p.in.w};
284
285             InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, InferenceEngine::NCHW, dims_src);
286             src->allocate();
287             fill_data(src->buffer(), src->size());
288
289             auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
290
291             if (srcPtr == nullptr)
292                 FAIL() << "Cannot cast blob to TBlob<float>.";
293
294             InferenceEngine::BlobMap srcs;
295             srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
296
297             InferenceEngine::OutputsDataMap out;
298             out = net_reader.getNetwork().getOutputsInfo();
299             InferenceEngine::BlobMap outputBlobs;
300
301             std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
302
303             InferenceEngine::TBlob<float>::Ptr output;
304             output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
305             output->allocate();
306             outputBlobs[item.first] = output;
307
308             graph.Infer(srcs, outputBlobs);
309
310             size_t c1_oh = (p.in.h + 2 * p.conv1.pad_h - p.conv1.krn_h) / p.conv1.str_h + 1;
311             size_t c1_ow = (p.in.w + 2 * p.conv1.pad_w - p.conv1.krn_w) / p.conv1.str_w + 1;
312             InferenceEngine::TBlob<float> conv1_dst_ref(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {c1_ow, c1_oh, p.conv1.out_c, p.in.n}, InferenceEngine::NCHW));
313             conv1_dst_ref.allocate();
314
315             size_t c2_oh = (c1_oh + 2 * p.conv2.pad_h - p.conv2.krn_h) / p.conv2.str_h + 1;
316             size_t c2_ow = (c1_ow + 2 * p.conv2.pad_w - p.conv2.krn_w) / p.conv2.str_w + 1;
317             InferenceEngine::TBlob<float> conv2_dst_ref(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {c2_ow, c2_oh, p.conv2.out_c, p.in.n}, InferenceEngine::NCHW));
318             conv2_dst_ref.allocate();
319
320             ref_conv(*srcPtr, (const float *)weights->buffer(), conv1_w_size, conv1_dst_ref, p.conv1, 0.0f);
321             ref_conv(conv1_dst_ref, (const float *)weights->buffer() + conv1_w_size, conv2_w_size, conv2_dst_ref, p.conv2, 0.0f);
322
323
324             compare(*output, conv2_dst_ref);
325         } catch (const InferenceEngine::details::InferenceEngineException &e) {
326             FAIL() << e.what();
327         }
328     }
329 };
330
331 TEST_P(MKLDNNGraphDWConvFusingTests, TestsDwConvFusing) {}
332
333 INSTANTIATE_TEST_CASE_P(
334         TestsDwConvFusing, MKLDNNGraphDWConvFusingTests,
335         ::testing::Values(
336                 dw_conv_fusing_test_params{{1, 32, 160, 320}, {1, 1, 1, 1, 0, 0, 24, 1}, {3, 3, 1, 1, 1, 1, 24, 24}}
337         ));