1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include <gtest/gtest.h>
6 #include <gmock/gmock-spec-builders.h>
7 #include "mkldnn_plugin/mkldnn_graph.h"
9 #include "test_graph.hpp"
11 #include "single_layer_common.hpp"
12 #include <mkldnn_plugin/mkldnn_extension_utils.h>
13 #include "tests_common.hpp"
15 using namespace ::testing;
17 using namespace mkldnn;
30 struct conv_depthwise_fusing_test_params {
39 algorithm depthwise_alg;
43 template <typename data_t>
44 void ref_conv_depthwise(const InferenceEngine::TBlob<data_t> &src, const data_t *weights,
45 InferenceEngine::TBlob<data_t> &dst, conv_depthwise_fusing_test_params& prm) {
46 size_t KW = prm.conv.krn_w;
47 size_t KH = prm.conv.krn_h;
48 size_t GC = prm.conv.grp_c;
50 size_t IC = src.dims()[1];
51 size_t IH = src.dims()[2];
52 size_t IW = src.dims()[3];
54 size_t OW = (IW + 2 * prm.conv.pad_w - prm.conv.krn_w) / prm.conv.str_w + 1;
55 size_t OH = (IH + 2 * prm.conv.pad_h - prm.conv.krn_h) / prm.conv.str_h + 1;
56 size_t OC = prm.conv.out_c;
58 const data_t *src_data = src.readOnly();
59 const data_t *weights_data = weights;
60 const data_t *bias_data = weights_data + KW * KH * OC * IC / GC;
61 data_t *dst_data = dst.data();
63 const data_t *d_weights_data = bias_data + OC;
64 const data_t *d_bias_data = (prm.isBroadcast) ? d_weights_data + 1 : d_weights_data + OC;
66 for (uint32_t g = 0; g < GC; g++) {
67 for (uint32_t oc = 0; oc < OC / GC; oc++) {
68 for (uint32_t oh = 0; oh < OH; oh++) {
69 for (uint32_t ow = 0; ow < OW; ow++) {
70 size_t bidx = g * OC / GC + oc;
71 size_t oidx = g * OC / GC * OH * OW
72 + oc * OH * OW + oh * OW + ow;
73 dst_data[oidx] = bias_data[bidx];
75 for (size_t ic = 0; ic < IC / GC; ic++) {
76 for (size_t kh = 0; kh < KH; kh++) {
77 for (size_t kw = 0; kw < KW; kw++) {
78 int32_t iw = ow * prm.conv.str_w - prm.conv.pad_w + kw;
79 int32_t ih = oh * prm.conv.str_h - prm.conv.pad_h + kh;
80 if (iw < 0 || iw >= (int32_t)IW || ih < 0
83 size_t iidx = g * IC / GC * IH * IW
84 + ic * IH * IW + ih * IW + iw;
85 size_t widx = g * OC / GC * IC / GC * KH * KW
86 + oc * IC / GC * KH * KW
87 + ic * KH * KW + kh * KW + kw;
89 dst_data[oidx] += src_data[iidx] * weights_data[widx];
95 switch(prm.depthwise_alg) {
96 case depthwise_scale_shift:
97 dst_data[oidx] = d_weights_data[prm.isBroadcast ? 0 : bidx] * dst_data[oidx] + d_bias_data[prm.isBroadcast ? 0 : bidx];
100 dst_data[oidx] = dst_data[oidx] >= 0 ? dst_data[oidx] : d_weights_data[prm.isBroadcast ? 0 : bidx] * dst_data[oidx];
103 assert("Unsupported depthwise algorithm");
111 class MKLDNNGraphConvDepthwiseFusingTests: public TestsCommon,
112 public WithParamInterface<conv_depthwise_fusing_test_params> {
113 std::string model_t = R"V0G0N(
114 <Net Name="Convolution_Only" version="2" precision="FP32" batch="1">
116 <layer name="in1" type="Input" precision="FP32" id="0">
126 <layer name="conv" id="1" type="Convolution" precision="FP32">
127 <convolution stride-x="_C_SW_" stride-y="_C_SH_"
128 pad-x="_C_PW_" pad-y="_C_PH_"
129 kernel-x="_C_KW_" kernel-y="_C_KH_"
130 output="_C_OC_" group="_C_GC_"/>
132 <weights offset="0" size="_C_S1_" />
133 <biases offset="_C_S1_" size="_C_S2_" />
151 <layer name="depthwise" id="2" type="_LT_" precision="FP32">
152 <data _P_NAME_="_P_VAL_" PrimitivesPriority="_IMPLS_"/>
153 <weights offset="_D_S0_" size="_D_S1_" />
154 <biases offset="_D_S2_" size="_D_S3_" />
175 <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
176 <edge from-layer="1" from-port="2" to-layer="2" to-port="3"/>
181 std::string getModel(conv_depthwise_fusing_test_params p) {
182 std::string model = model_t;
183 REPLACE_WITH_NUM(model, "_IW_", p.in.w);
184 REPLACE_WITH_NUM(model, "_IH_", p.in.h);
185 REPLACE_WITH_NUM(model, "_IC_", p.in.c);
186 REPLACE_WITH_NUM(model, "_IN_", p.in.n);
188 REPLACE_WITH_NUM(model, "_C_KW_", p.conv.krn_w);
189 REPLACE_WITH_NUM(model, "_C_KH_", p.conv.krn_h);
190 REPLACE_WITH_NUM(model, "_C_SW_", p.conv.str_w);
191 REPLACE_WITH_NUM(model, "_C_SH_", p.conv.str_h);
192 REPLACE_WITH_NUM(model, "_C_PW_", p.conv.pad_w);
193 REPLACE_WITH_NUM(model, "_C_PH_", p.conv.pad_h);
194 REPLACE_WITH_NUM(model, "_C_GC_", p.conv.grp_c);
195 REPLACE_WITH_NUM(model, "_C_OC_", p.conv.out_c);
196 size_t c_oh = (p.in.h + 2 * p.conv.pad_h - p.conv.krn_h) / p.conv.str_h + 1;
197 size_t c_ow = (p.in.w + 2 * p.conv.pad_w - p.conv.krn_w) / p.conv.str_w + 1;
198 REPLACE_WITH_NUM(model, "_C_OH_", c_oh);
199 REPLACE_WITH_NUM(model, "_C_OW_", c_ow);
201 size_t conv_w_data_size = (p.conv.krn_w * p.conv.krn_h * p.conv.out_c * p.in.c / p.conv.grp_c) * sizeof(float);
202 size_t conv_b_data_size = p.conv.out_c * sizeof(float);
203 REPLACE_WITH_NUM(model, "_C_S1_", conv_w_data_size);
204 REPLACE_WITH_NUM(model, "_C_S2_", conv_b_data_size);
206 if (p.depthwise_alg == depthwise_scale_shift) {
207 REPLACE_WITH_STR(model, "_LT_", "ScaleShift");
208 REPLACE_WITH_STR(model, "_P_NAME_", "broadcast");
209 REPLACE_WITH_NUM(model, "_P_VAL_", p.isBroadcast ? 1 : 0);
212 else if (p.depthwise_alg == depthwise_prelu) {
213 REPLACE_WITH_STR(model, "_LT_", "PReLU");
214 REPLACE_WITH_STR(model, "_P_NAME_", "channel_shared");
215 REPLACE_WITH_NUM(model, "_P_VAL_", p.isBroadcast ? 1 : 0);
218 size_t array_size = p.isBroadcast ? 1 : p.conv.out_c;
219 size_t depthwise_w_data_size = array_size * sizeof(float);
220 size_t depthwise_b_data_size = array_size * sizeof(float);
221 REPLACE_WITH_NUM(model, "_D_S0_", conv_w_data_size + conv_b_data_size);
222 REPLACE_WITH_NUM(model, "_D_S1_", depthwise_w_data_size);
223 REPLACE_WITH_NUM(model, "_D_S2_", conv_w_data_size + conv_b_data_size + depthwise_w_data_size);
224 REPLACE_WITH_NUM(model, "_D_S3_", depthwise_b_data_size);
230 virtual void TearDown() {
233 virtual void SetUp() {
235 TestsCommon::SetUp();
236 conv_depthwise_fusing_test_params p = ::testing::WithParamInterface<conv_depthwise_fusing_test_params>::GetParam();
237 std::string model = getModel(p);
239 InferenceEngine::CNNNetReader net_reader;
240 ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
242 size_t conv_w_size = p.conv.krn_w * p.conv.krn_h * p.conv.out_c * p.in.c / p.conv.grp_c + p.conv.out_c; // conv weights + biases
244 size_t array_size = p.isBroadcast ? 1 : p.conv.out_c;
245 size_t depthwise_w_size = array_size + array_size; // depthwise weights + biases
247 InferenceEngine::TBlob<uint8_t> *weights = new InferenceEngine::TBlob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {(conv_w_size+depthwise_w_size) * sizeof(float)});
249 fill_data_sine((float *) weights->buffer(), weights->size() / sizeof(float), 5, 10, 0.5);
250 InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
252 net_reader.SetWeights(weights_ptr);
254 MKLDNNGraphTestClass graph;
255 graph.CreateGraph(net_reader.getNetwork());
257 auto& nodes = graph.getNodes();
258 nodes = graph.getNodes();
260 ASSERT_EQ(nodes.size(), 3);
261 ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
262 ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Convolution_Depthwise);
263 ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Output);
265 ASSERT_EQ(nodes.size(), 5);
266 ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
267 ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Reorder);
268 ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Convolution_Depthwise);
269 ASSERT_EQ(nodes[3].get()->getType(), MKLDNNPlugin::Type::Reorder);
270 ASSERT_EQ(nodes[4].get()->getType(), MKLDNNPlugin::Type::Output);
273 InferenceEngine::SizeVector dims_src = {p.in.n, p.in.c, p.in.h, p.in.w};
275 InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, InferenceEngine::NCHW, dims_src);
277 fill_data(src->buffer(), src->size());
279 auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
281 if (srcPtr == nullptr)
282 FAIL() << "Cannot cast blob to TBlob<float>.";
284 InferenceEngine::BlobMap srcs;
285 srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
287 InferenceEngine::OutputsDataMap out;
288 out = net_reader.getNetwork().getOutputsInfo();
289 InferenceEngine::BlobMap outputBlobs;
291 std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
293 InferenceEngine::TBlob<float>::Ptr output;
294 output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
296 outputBlobs[item.first] = output;
298 graph.Infer(srcs, outputBlobs);
300 size_t c1_oh = (p.in.h + 2 * p.conv.pad_h - p.conv.krn_h) / p.conv.str_h + 1;
301 size_t c1_ow = (p.in.w + 2 * p.conv.pad_w - p.conv.krn_w) / p.conv.str_w + 1;
302 InferenceEngine::TBlob<float> dst_ref(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {c1_ow, c1_oh, p.conv.out_c, p.in.n}, InferenceEngine::NCHW));
305 ref_conv_depthwise(*srcPtr, (const float *)weights->buffer(), dst_ref, p);
307 compare(*output, dst_ref);
308 } catch (const InferenceEngine::details::InferenceEngineException &e) {
314 TEST_P(MKLDNNGraphConvDepthwiseFusingTests, TestsConvDepthwiseFusing) {}
316 INSTANTIATE_TEST_CASE_P(
317 TestsConvDepthwiseFusing, MKLDNNGraphConvDepthwiseFusingTests,
319 conv_depthwise_fusing_test_params{{1, 64, 5, 5}, {1, 1, 1, 1, 0, 0, 48, 1}, depthwise_scale_shift, false},
320 conv_depthwise_fusing_test_params{{1, 64, 5, 5}, {1, 1, 1, 1, 0, 0, 48, 1}, depthwise_prelu, false},
321 conv_depthwise_fusing_test_params{{1, 64, 5, 5}, {1, 1, 1, 1, 0, 0, 48, 1}, depthwise_scale_shift, true},
322 conv_depthwise_fusing_test_params{{1, 64, 5, 5}, {1, 1, 1, 1, 0, 0, 48, 1}, depthwise_prelu, true},
323 conv_depthwise_fusing_test_params{{1, 48, 9, 9}, {3, 3, 1, 1, 1, 1, 64, 1}, depthwise_scale_shift, false},
324 conv_depthwise_fusing_test_params{{1, 48, 9, 9}, {3, 3, 1, 1, 1, 1, 64, 1}, depthwise_prelu, false},
325 conv_depthwise_fusing_test_params{{1, 48, 9, 9}, {3, 3, 1, 1, 1, 1, 64, 1}, depthwise_scale_shift, true},
326 conv_depthwise_fusing_test_params{{1, 48, 9, 9}, {3, 3, 1, 1, 1, 1, 64, 1}, depthwise_prelu, true},
327 conv_depthwise_fusing_test_params{{1, 48, 11, 11}, {3, 3, 1, 1, 1, 1, 48, 48}, depthwise_scale_shift, false},
328 conv_depthwise_fusing_test_params{{1, 48, 11, 11}, {3, 3, 1, 1, 1, 1, 48, 48}, depthwise_prelu, false},
329 conv_depthwise_fusing_test_params{{1, 48, 11, 11}, {3, 3, 1, 1, 1, 1, 48, 48}, depthwise_scale_shift, true},
330 conv_depthwise_fusing_test_params{{1, 48, 11, 11}, {3, 3, 1, 1, 1, 1, 48, 48}, depthwise_prelu, true},
331 conv_depthwise_fusing_test_params{{1, 3, 11, 11}, {3, 3, 1, 1, 1, 1, 3, 3}, depthwise_scale_shift, false},
332 conv_depthwise_fusing_test_params{{1, 3, 11, 11}, {3, 3, 1, 1, 1, 1, 3, 3}, depthwise_prelu, false},
333 conv_depthwise_fusing_test_params{{1, 3, 11, 11}, {3, 3, 1, 1, 1, 1, 3, 3}, depthwise_scale_shift, true},
334 conv_depthwise_fusing_test_params{{1, 3, 11, 11}, {3, 3, 1, 1, 1, 1, 3, 3}, depthwise_prelu, true}