Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / tests / unit / engines / mkldnn / graph / structure / graph_conv_depthwise_fusing_test.cpp
1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include <gtest/gtest.h>
6 #include <gmock/gmock-spec-builders.h>
7 #include "mkldnn_plugin/mkldnn_graph.h"
8
9 #include "test_graph.hpp"
10
11 #include "single_layer_common.hpp"
12 #include <mkldnn_plugin/mkldnn_extension_utils.h>
13 #include "tests_common.hpp"
14
15 using namespace ::testing;
16 using namespace std;
17 using namespace mkldnn;
18
19 struct conv_params {
20     size_t krn_w;
21     size_t krn_h;
22     size_t str_w;
23     size_t str_h;
24     size_t pad_w;
25     size_t pad_h;
26     size_t out_c;
27     size_t grp_c;
28 };
29
30 struct conv_depthwise_fusing_test_params {
31     struct {
32         size_t n;
33         size_t c;
34         size_t h;
35         size_t w;
36     } in;
37
38     conv_params conv;
39     algorithm depthwise_alg;
40     bool isBroadcast;
41 };
42
43 template <typename data_t>
44 void ref_conv_depthwise(const InferenceEngine::TBlob<data_t> &src, const data_t *weights,
45               InferenceEngine::TBlob<data_t> &dst, conv_depthwise_fusing_test_params& prm) {
46     size_t KW = prm.conv.krn_w;
47     size_t KH = prm.conv.krn_h;
48     size_t GC = prm.conv.grp_c;
49
50     size_t IC = src.dims()[1];
51     size_t IH = src.dims()[2];
52     size_t IW = src.dims()[3];
53
54     size_t OW = (IW + 2 * prm.conv.pad_w - prm.conv.krn_w) / prm.conv.str_w + 1;
55     size_t OH = (IH + 2 * prm.conv.pad_h - prm.conv.krn_h) / prm.conv.str_h + 1;
56     size_t OC = prm.conv.out_c;
57
58     const data_t *src_data = src.readOnly();
59     const data_t *weights_data = weights;
60     const data_t *bias_data = weights_data + KW * KH * OC * IC / GC;
61     data_t *dst_data = dst.data();
62
63     const data_t *d_weights_data = bias_data + OC;
64     const data_t *d_bias_data = (prm.isBroadcast) ? d_weights_data + 1 : d_weights_data + OC;
65
66     for (uint32_t g = 0; g < GC; g++) {
67         for (uint32_t oc = 0; oc < OC / GC; oc++) {
68             for (uint32_t oh = 0; oh < OH; oh++) {
69                 for (uint32_t ow = 0; ow < OW; ow++) {
70                     size_t bidx = g * OC / GC + oc;
71                     size_t oidx = g * OC / GC * OH * OW
72                                   + oc * OH * OW + oh * OW + ow;
73                     dst_data[oidx] = bias_data[bidx];
74
75                     for (size_t ic = 0; ic < IC / GC; ic++) {
76                         for (size_t kh = 0; kh < KH; kh++) {
77                             for (size_t kw = 0; kw < KW; kw++) {
78                                 int32_t iw = ow * prm.conv.str_w - prm.conv.pad_w + kw;
79                                 int32_t ih = oh * prm.conv.str_h - prm.conv.pad_h + kh;
80                                 if (iw < 0 || iw >= (int32_t)IW || ih < 0
81                                     || ih >= (int32_t)IH)
82                                     continue;
83                                 size_t iidx = g * IC / GC * IH * IW
84                                               + ic * IH * IW + ih * IW + iw;
85                                 size_t widx = g * OC / GC * IC / GC * KH * KW
86                                               + oc * IC / GC * KH * KW
87                                               + ic * KH * KW + kh * KW + kw;
88
89                                 dst_data[oidx] += src_data[iidx] * weights_data[widx];
90                             }
91                         }
92                     }
93
94
95                     switch(prm.depthwise_alg) {
96                         case depthwise_scale_shift:
97                             dst_data[oidx] = d_weights_data[prm.isBroadcast ? 0 : bidx] * dst_data[oidx] + d_bias_data[prm.isBroadcast ? 0 : bidx];
98                             break;
99                         case depthwise_prelu:
100                             dst_data[oidx] = dst_data[oidx] >= 0 ? dst_data[oidx] : d_weights_data[prm.isBroadcast ? 0 : bidx] * dst_data[oidx];
101                             break;
102                         default:
103                             assert("Unsupported depthwise algorithm");
104                     }
105                 }
106             }
107         }
108     }
109 }
110
111 class MKLDNNGraphConvDepthwiseFusingTests: public TestsCommon,
112                                     public WithParamInterface<conv_depthwise_fusing_test_params> {
113     std::string model_t = R"V0G0N(
114 <Net Name="Convolution_Only" version="2" precision="FP32" batch="1">
115     <layers>
116         <layer name="in1" type="Input" precision="FP32" id="0">
117             <output>
118                 <port id="0">
119                     <dim>_IN_</dim>
120                     <dim>_IC_</dim>
121                     <dim>_IH_</dim>
122                     <dim>_IW_</dim>
123                 </port>
124             </output>
125         </layer>
126         <layer name="conv" id="1" type="Convolution" precision="FP32">
127             <convolution stride-x="_C_SW_" stride-y="_C_SH_"
128                          pad-x="_C_PW_"    pad-y="_C_PH_"
129                          kernel-x="_C_KW_" kernel-y="_C_KH_"
130                          output="_C_OC_"   group="_C_GC_"/>
131
132             <weights offset="0" size="_C_S1_" />
133             <biases offset="_C_S1_" size="_C_S2_" />
134             <input>
135                 <port id="1">
136                     <dim>_IN_</dim>
137                     <dim>_IC_</dim>
138                     <dim>_IH_</dim>
139                     <dim>_IW_</dim>
140                 </port>
141             </input>
142             <output>
143                 <port id="2">
144                     <dim>_IN_</dim>
145                     <dim>_C_OC_</dim>
146                     <dim>_C_OH_</dim>
147                     <dim>_C_OW_</dim>
148                 </port>
149             </output>
150         </layer>
151         <layer name="depthwise" id="2" type="_LT_" precision="FP32">
152             <data _P_NAME_="_P_VAL_"  PrimitivesPriority="_IMPLS_"/>
153             <weights offset="_D_S0_" size="_D_S1_" />
154             <biases offset="_D_S2_" size="_D_S3_" />
155
156             <input>
157                 <port id="3">
158                     <dim>_IN_</dim>
159                     <dim>_C_OC_</dim>
160                     <dim>_C_OH_</dim>
161                     <dim>_C_OW_</dim>
162                 </port>
163             </input>
164             <output>
165                 <port id="4">
166                     <dim>_IN_</dim>
167                     <dim>_C_OC_</dim>
168                     <dim>_C_OH_</dim>
169                     <dim>_C_OW_</dim>
170                 </port>
171             </output>
172         </layer>
173     </layers>
174     <edges>
175         <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
176         <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
177     </edges>
178 </Net>
179 )V0G0N";
180
181     std::string getModel(conv_depthwise_fusing_test_params p) {
182         std::string model = model_t;
183         REPLACE_WITH_NUM(model, "_IW_", p.in.w);
184         REPLACE_WITH_NUM(model, "_IH_", p.in.h);
185         REPLACE_WITH_NUM(model, "_IC_", p.in.c);
186         REPLACE_WITH_NUM(model, "_IN_", p.in.n);
187
188         REPLACE_WITH_NUM(model, "_C_KW_", p.conv.krn_w);
189         REPLACE_WITH_NUM(model, "_C_KH_", p.conv.krn_h);
190         REPLACE_WITH_NUM(model, "_C_SW_", p.conv.str_w);
191         REPLACE_WITH_NUM(model, "_C_SH_", p.conv.str_h);
192         REPLACE_WITH_NUM(model, "_C_PW_", p.conv.pad_w);
193         REPLACE_WITH_NUM(model, "_C_PH_", p.conv.pad_h);
194         REPLACE_WITH_NUM(model, "_C_GC_", p.conv.grp_c);
195         REPLACE_WITH_NUM(model, "_C_OC_", p.conv.out_c);
196         size_t c_oh = (p.in.h + 2 * p.conv.pad_h - p.conv.krn_h) / p.conv.str_h + 1;
197         size_t c_ow = (p.in.w + 2 * p.conv.pad_w - p.conv.krn_w) / p.conv.str_w + 1;
198         REPLACE_WITH_NUM(model, "_C_OH_", c_oh);
199         REPLACE_WITH_NUM(model, "_C_OW_", c_ow);
200
201         size_t conv_w_data_size = (p.conv.krn_w * p.conv.krn_h * p.conv.out_c * p.in.c / p.conv.grp_c) * sizeof(float);
202         size_t conv_b_data_size = p.conv.out_c * sizeof(float);
203         REPLACE_WITH_NUM(model, "_C_S1_", conv_w_data_size);
204         REPLACE_WITH_NUM(model, "_C_S2_", conv_b_data_size);
205
206         if (p.depthwise_alg == depthwise_scale_shift) {
207             REPLACE_WITH_STR(model, "_LT_", "ScaleShift");
208             REPLACE_WITH_STR(model, "_P_NAME_", "broadcast");
209             REPLACE_WITH_NUM(model, "_P_VAL_", p.isBroadcast ? 1 : 0);
210
211         }
212         else if (p.depthwise_alg == depthwise_prelu) {
213             REPLACE_WITH_STR(model, "_LT_", "PReLU");
214             REPLACE_WITH_STR(model, "_P_NAME_", "channel_shared");
215             REPLACE_WITH_NUM(model, "_P_VAL_", p.isBroadcast ? 1 : 0);
216         }
217
218         size_t array_size =  p.isBroadcast ? 1 : p.conv.out_c;
219         size_t depthwise_w_data_size = array_size * sizeof(float);
220         size_t depthwise_b_data_size = array_size * sizeof(float);
221         REPLACE_WITH_NUM(model, "_D_S0_", conv_w_data_size + conv_b_data_size);
222         REPLACE_WITH_NUM(model, "_D_S1_", depthwise_w_data_size);
223         REPLACE_WITH_NUM(model, "_D_S2_", conv_w_data_size + conv_b_data_size + depthwise_w_data_size);
224         REPLACE_WITH_NUM(model, "_D_S3_", depthwise_b_data_size);
225
226         return model;
227     }
228
229 protected:
230     virtual void TearDown() {
231     }
232
233     virtual void SetUp() {
234         try {
235             TestsCommon::SetUp();
236             conv_depthwise_fusing_test_params p = ::testing::WithParamInterface<conv_depthwise_fusing_test_params>::GetParam();
237             std::string model = getModel(p);
238
239             InferenceEngine::CNNNetReader net_reader;
240             ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
241
242             size_t conv_w_size = p.conv.krn_w * p.conv.krn_h * p.conv.out_c * p.in.c / p.conv.grp_c + p.conv.out_c; // conv weights + biases
243
244             size_t array_size =  p.isBroadcast ? 1 : p.conv.out_c;
245             size_t depthwise_w_size = array_size + array_size; // depthwise weights + biases
246
247             InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {(conv_w_size+depthwise_w_size) * sizeof(float)});
248             weights->allocate();
249             fill_data_sine((float *) weights->buffer(), weights->size() / sizeof(float), 5, 10, 0.5);
250             InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
251
252             net_reader.SetWeights(weights_ptr);
253
254             MKLDNNGraphTestClass graph;
255             graph.CreateGraph(net_reader.getNetwork());
256
257             auto& nodes = graph.getNodes();
258             nodes = graph.getNodes();
259             if (p.in.c == 3) {
260                 ASSERT_EQ(nodes.size(), 3);
261                 ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
262                 ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Convolution_Depthwise);
263                 ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Output);
264             } else {
265                 ASSERT_EQ(nodes.size(), 5);
266                 ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
267                 ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Reorder);
268                 ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Convolution_Depthwise);
269                 ASSERT_EQ(nodes[3].get()->getType(), MKLDNNPlugin::Type::Reorder);
270                 ASSERT_EQ(nodes[4].get()->getType(), MKLDNNPlugin::Type::Output);
271             }
272
273             InferenceEngine::SizeVector dims_src = {p.in.n, p.in.c, p.in.h, p.in.w};
274
275             InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, InferenceEngine::NCHW, dims_src);
276             src->allocate();
277             fill_data(src->buffer(), src->size());
278
279             auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
280
281             if (srcPtr == nullptr)
282                 FAIL() << "Cannot cast blob to TBlob<float>.";
283
284             InferenceEngine::BlobMap srcs;
285             srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
286
287             InferenceEngine::OutputsDataMap out;
288             out = net_reader.getNetwork().getOutputsInfo();
289             InferenceEngine::BlobMap outputBlobs;
290
291             std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
292
293             InferenceEngine::TBlob<float>::Ptr output;
294             output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
295             output->allocate();
296             outputBlobs[item.first] = output;
297
298             graph.Infer(srcs, outputBlobs);
299
300             size_t c1_oh = (p.in.h + 2 * p.conv.pad_h - p.conv.krn_h) / p.conv.str_h + 1;
301             size_t c1_ow = (p.in.w + 2 * p.conv.pad_w - p.conv.krn_w) / p.conv.str_w + 1;
302             InferenceEngine::TBlob<float> dst_ref(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {c1_ow, c1_oh, p.conv.out_c, p.in.n}, InferenceEngine::NCHW));
303             dst_ref.allocate();
304
305             ref_conv_depthwise(*srcPtr, (const float *)weights->buffer(), dst_ref, p);
306
307             compare(*output, dst_ref);
308         } catch (const InferenceEngine::details::InferenceEngineException &e) {
309             FAIL() << e.what();
310         }
311     }
312 };
313
314 TEST_P(MKLDNNGraphConvDepthwiseFusingTests, TestsConvDepthwiseFusing) {}
315
316 INSTANTIATE_TEST_CASE_P(
317         TestsConvDepthwiseFusing, MKLDNNGraphConvDepthwiseFusingTests,
318         ::testing::Values(
319                 conv_depthwise_fusing_test_params{{1, 64, 5, 5}, {1, 1, 1, 1, 0, 0, 48, 1}, depthwise_scale_shift, false},
320                 conv_depthwise_fusing_test_params{{1, 64, 5, 5}, {1, 1, 1, 1, 0, 0, 48, 1}, depthwise_prelu, false},
321                 conv_depthwise_fusing_test_params{{1, 64, 5, 5}, {1, 1, 1, 1, 0, 0, 48, 1}, depthwise_scale_shift, true},
322                 conv_depthwise_fusing_test_params{{1, 64, 5, 5}, {1, 1, 1, 1, 0, 0, 48, 1}, depthwise_prelu, true},
323                 conv_depthwise_fusing_test_params{{1, 48, 9, 9}, {3, 3, 1, 1, 1, 1, 64, 1}, depthwise_scale_shift, false},
324                 conv_depthwise_fusing_test_params{{1, 48, 9, 9}, {3, 3, 1, 1, 1, 1, 64, 1}, depthwise_prelu, false},
325                 conv_depthwise_fusing_test_params{{1, 48, 9, 9}, {3, 3, 1, 1, 1, 1, 64, 1}, depthwise_scale_shift, true},
326                 conv_depthwise_fusing_test_params{{1, 48, 9, 9}, {3, 3, 1, 1, 1, 1, 64, 1}, depthwise_prelu, true},
327                 conv_depthwise_fusing_test_params{{1, 48, 11, 11}, {3, 3, 1, 1, 1, 1, 48, 48}, depthwise_scale_shift, false},
328                 conv_depthwise_fusing_test_params{{1, 48, 11, 11}, {3, 3, 1, 1, 1, 1, 48, 48}, depthwise_prelu, false},
329                 conv_depthwise_fusing_test_params{{1, 48, 11, 11}, {3, 3, 1, 1, 1, 1, 48, 48}, depthwise_scale_shift, true},
330                 conv_depthwise_fusing_test_params{{1, 48, 11, 11}, {3, 3, 1, 1, 1, 1, 48, 48}, depthwise_prelu, true},
331                 conv_depthwise_fusing_test_params{{1, 3, 11, 11}, {3, 3, 1, 1, 1, 1, 3, 3}, depthwise_scale_shift, false},
332                 conv_depthwise_fusing_test_params{{1, 3, 11, 11}, {3, 3, 1, 1, 1, 1, 3, 3}, depthwise_prelu, false},
333                 conv_depthwise_fusing_test_params{{1, 3, 11, 11}, {3, 3, 1, 1, 1, 1, 3, 3}, depthwise_scale_shift, true},
334                 conv_depthwise_fusing_test_params{{1, 3, 11, 11}, {3, 3, 1, 1, 1, 1, 3, 3}, depthwise_prelu, true}
335         ));