From 33754904f9b0873661c37947442c44e36fab0cab Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EB=B0=95=EC=B2=9C=EA=B5=90/On-Device=20Lab=28SR=29/Enginee?= =?utf8?q?r/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= Date: Thu, 18 Jul 2019 14:03:16 +0900 Subject: [PATCH] [locomotiv] Support DepthwiseConv2D (#4338) This commit supports node execution for DepthwiseConv2D. It also includes related test for valid padding case. Signed-off-by: Cheongyo Bahk --- contrib/locomotiv/src/Node.lst | 1 + contrib/locomotiv/src/Node/DepthwiseConv2D.cpp | 185 +++++++++++++++++++++ .../locomotiv/src/Node/DepthwiseConv2D.test.cpp | 164 ++++++++++++++++++ 3 files changed, 350 insertions(+) create mode 100644 contrib/locomotiv/src/Node/DepthwiseConv2D.cpp create mode 100644 contrib/locomotiv/src/Node/DepthwiseConv2D.test.cpp diff --git a/contrib/locomotiv/src/Node.lst b/contrib/locomotiv/src/Node.lst index efa8273..e4102f0 100644 --- a/contrib/locomotiv/src/Node.lst +++ b/contrib/locomotiv/src/Node.lst @@ -9,6 +9,7 @@ NODE(BiasAdd) NODE(BiasEncode) NODE(ConstGen) NODE(Conv2D) +NODE(DepthwiseConv2D) NODE(FeatureDecode) NODE(FeatureEncode) NODE(FilterEncode) diff --git a/contrib/locomotiv/src/Node/DepthwiseConv2D.cpp b/contrib/locomotiv/src/Node/DepthwiseConv2D.cpp new file mode 100644 index 0000000..706a826 --- /dev/null +++ b/contrib/locomotiv/src/Node/DepthwiseConv2D.cpp @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "NodeDataImpl.h" +#include "NodeDomain.h" +#include "Validation.h" + +#include +#include +#include +#include +#include + +#include +#include + +namespace +{ + +/** + * @brief Compute 1D output size based on given 1D arguments. + * + * @param whole_pad Sum of front and back pad + */ +inline uint32_t compute_out_size(uint32_t image_size, uint32_t whole_pad, uint32_t filter_size, + uint32_t stride) +{ + assert((image_size + whole_pad - filter_size) % stride == 0); + return (image_size + whole_pad - filter_size) / stride + 1; +} + +using nncc::core::ADT::tensor::Buffer; +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::Index; +using nncc::core::ADT::tensor::IndexEnumerator; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; + +/** + * @brief Calculates DepthwiseConv2D + * @note ifm_buf has NHWC and ker_buf HWCM format + */ +template +Buffer calc_dw_conv2d(const loco::DepthwiseConv2D *dw_conv2d, const Buffer *ifm_buf, + const Buffer *ker_buf) +{ + auto ifm_shape = ifm_buf->shape(); + auto ker_shape = ker_buf->shape(); + + locomotiv::validate(ifm_shape.rank() == 4, "ifm rank must be 4"); + locomotiv::validate(ker_shape.rank() == 4, "depthwise filter rank must be 4"); + locomotiv::validate(ifm_shape.dim(3 /* of NHWC */) == ker_shape.dim(2 /* of HWCM */), + "channel value mismatch"); // should have same channel values + + const uint32_t ifm_height = ifm_shape.dim(1); + const uint32_t ifm_width = ifm_shape.dim(2); + + const uint32_t ker_height = ker_shape.dim(0); + const uint32_t ker_width = ker_shape.dim(1); + + const uint32_t stride_width = dw_conv2d->stride()->horizontal(); + const uint32_t stride_height = dw_conv2d->stride()->vertical(); + + // TODO Enable dilations. Let's set these to 1 for now. + const uint32_t dilation_width_factor = 1; + const uint32_t dilation_height_factor = 1; + + const uint32_t pad_top = dw_conv2d->pad()->top(); + const uint32_t pad_bottom = dw_conv2d->pad()->bottom(); + + const uint32_t pad_left = dw_conv2d->pad()->left(); + const uint32_t pad_right = dw_conv2d->pad()->right(); + + const uint32_t ofm_height = + compute_out_size(ifm_height, pad_top + pad_bottom, ker_height, stride_height); + const uint32_t ofm_width = + compute_out_size(ifm_width, pad_left + pad_right, ker_width, stride_width); + + const uint32_t batches = ifm_shape.dim(0); + const uint32_t ifm_depth = ifm_shape.dim(3); + const uint32_t multiplier = ker_shape.dim(3); + const uint32_t ofm_depth = ifm_depth * multiplier; + + Shape ofm_shape{batches, ofm_height, ofm_width, ofm_depth}; + auto ofm_buf = make_buffer(ofm_shape); + + for (uint32_t batch = 0; batch < batches; ++batch) + { + for (uint32_t ofm_y = 0; ofm_y < ofm_height; ++ofm_y) + { + for (uint32_t ofm_x = 0; ofm_x < ofm_width; ++ofm_x) + { + for (uint32_t ch = 0; ch < ifm_depth; ++ch) + { + for (uint32_t nth = 0; nth < multiplier; nth++) + { + const int in_x_origin = (ofm_x * stride_width) - pad_left; + const int in_y_origin = (ofm_y * stride_height) - pad_top; + float total = 0.f; + for (uint32_t ker_y = 0; ker_y < ker_height; ++ker_y) + { + for (uint32_t ker_x = 0; ker_x < ker_width; ++ker_x) + { + const int in_x = in_x_origin + dilation_width_factor * ker_x; + const int in_y = in_y_origin + dilation_height_factor * ker_y; + // If the location is outside the bounds of the input image, + // use zero as a default value. + if ((in_x >= 0) && ((unsigned)in_x < ifm_width) && (in_y >= 0) && + ((unsigned)in_y < ifm_height)) + { + auto ifm_value = ifm_buf->at(Index({batch, (unsigned)in_y, (unsigned)in_x, ch})); + auto ker_value = ker_buf->at(Index({ker_y, ker_x, ch, nth})); + total += (ifm_value * ker_value); + } + } + } + uint32_t ofm_channel = ch * multiplier + nth; + ofm_buf.at(Index({batch, ofm_y, ofm_x, ofm_channel})) = total; + } + } + } + } + } + return ofm_buf; +} + +} // namespace + +namespace locomotiv +{ + +void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d) +{ + auto ifm_data = annot_data(dw_conv2d->ifm()); + auto ker_data = annot_data(dw_conv2d->ker()); + + validate(ifm_data, "Can't find input data of DepthwiseConv2D"); + validate(ifm_data->shape()->rank() == 4, "ifm rank must be 4"); + + validate(ker_data, "Can't find kernel data of DepthwiseConv2D"); + validate(ker_data->shape()->rank() == 4, "Kernel rank must be 4"); + + validate(annot_domain(dw_conv2d->ifm()) == loco::Domain::Feature, + "IFM of DepthwiseConv2D is not feature"); + validate(annot_domain(dw_conv2d->ker()) == loco::Domain::DepthwiseFilter, + "Kernel of DepthwiseConv2D is not depthwise filter"); + + std::unique_ptr dw_conv2d_result = nullptr; + + if (ifm_data->dtype() == loco::DataType::FLOAT32 && ker_data->dtype() == loco::DataType::FLOAT32) + { + auto ifm_buf = ifm_data->as_f32_bufptr(); + auto ker_buf = ker_data->as_f32_bufptr(); + + auto dw_conv2d_buf = calc_dw_conv2d(dw_conv2d, ifm_buf, ker_buf); + + dw_conv2d_result = make_data(dw_conv2d_buf); + } + else + throw std::runtime_error("NYI for these DataTypes"); + + assert(dw_conv2d_result != nullptr); + + erase_annot_data(dw_conv2d); + annot_data(dw_conv2d, std::move(dw_conv2d_result)); + annot_domain(dw_conv2d, loco::Domain::Feature); +} + +} // namespace locomotiv diff --git a/contrib/locomotiv/src/Node/DepthwiseConv2D.test.cpp b/contrib/locomotiv/src/Node/DepthwiseConv2D.test.cpp new file mode 100644 index 0000000..48824c2 --- /dev/null +++ b/contrib/locomotiv/src/Node/DepthwiseConv2D.test.cpp @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "NodeExecution.h" + +#include "locomotiv/NodeData.h" +#include "NodeDataImpl.h" +#include "NodeDomain.h" + +#include +#include +#include +#include +#include "nncc/core/ADT/tensor/IndexEnumerator.h" + +#include + +namespace +{ +using nncc::core::ADT::tensor::Shape; +using nncc::core::ADT::tensor::LexicalLayout; +using nncc::core::ADT::tensor::make_buffer; +using nncc::core::ADT::tensor::make_overlay; + +void run_test(const float *ifm, const float *ker, const float *expected_ofm, const Shape &ifm_shape, + const Shape ker_shape, const Shape ofm_shape, const uint32_t stride_v, + const uint32_t stride_h, const uint32_t pad_top = 0, const uint32_t pad_bottom = 0, + const uint32_t pad_left = 0, const uint32_t pad_right = 0) +{ + auto g = loco::make_graph(); + + // Fill output data of FeatureEncode from ifm + auto ifm_enc = g->nodes()->create(); + { + auto ifm_enc_buf = make_buffer(ifm_shape); + auto ifm_overlay = make_overlay(ifm_shape, const_cast(ifm)); + for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + ifm_enc_buf.at(ind) = ifm_overlay.at(ind); + } + + auto enc_data = locomotiv::make_data(ifm_enc_buf); + locomotiv::annot_data(ifm_enc, std::move(enc_data)); + locomotiv::annot_domain(ifm_enc, loco::Domain::Feature); + } + + // Fill output data of DepthwiseFilterEncode from ker + auto ker_enc = g->nodes()->create(); + { + auto ker_enc_buf = make_buffer(ker_shape); + auto ker_overlay = make_overlay(ker_shape, const_cast(ker)); + for (nncc::core::ADT::tensor::IndexEnumerator e{ker_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + ker_enc_buf.at(ind) = ker_overlay.at(ind); + } + + auto enc_data = locomotiv::make_data(ker_enc_buf); + locomotiv::annot_data(ker_enc, std::move(enc_data)); + locomotiv::annot_domain(ker_enc, loco::Domain::DepthwiseFilter); + } + + // build DepthwiseConv2D + auto dw_conv2d = g->nodes()->create(); + dw_conv2d->ifm(ifm_enc); + dw_conv2d->ker(ker_enc); + dw_conv2d->stride()->vertical(stride_v); + dw_conv2d->stride()->horizontal(stride_h); + dw_conv2d->pad()->top(pad_top); + dw_conv2d->pad()->bottom(pad_bottom); + dw_conv2d->pad()->left(pad_left); + dw_conv2d->pad()->right(pad_right); + + // run interpreter + locomotiv::NodeExecution::get().run(dw_conv2d); + + // get result of calculation + auto dw_conv2d_result = locomotiv::annot_data(dw_conv2d); + + // check the result + ASSERT_NE(dw_conv2d_result, nullptr); + ASSERT_TRUE(dw_conv2d_result->dtype() == loco::DataType::FLOAT32); + ASSERT_TRUE(*(dw_conv2d_result->shape()) == ofm_shape); + + auto ofm_overlay = + make_overlay(ofm_shape, const_cast(expected_ofm)); + for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance()) + { + const auto &ind = e.current(); + ASSERT_FLOAT_EQ(dw_conv2d_result->as_f32_bufptr()->at(ind), ofm_overlay.at(ind)); + } + + ASSERT_EQ(locomotiv::annot_domain(dw_conv2d), loco::Domain::Feature); +} + +} // namespace + +// clang-format off + +/* ifm, ker and ofm are from the code below: + +ifm = tf.random_normal([1, 5, 5, 2], stddev=1.1) +ker = tf.random_normal([4, 4, 2, 3], stddev=1.1) +out = tf.nn.depthwise_conv2d(ifm, ker, strides = [1, 1, 1, 1], padding= 'VALID') + +with tf.Session() as sess: + print(sess.run(out)) +*/ +TEST(NodeExecution_DepthwiseConv2D, f32_random_valid) +{ + using nncc::core::ADT::tensor::Shape; + + const float ifm[] = {0.8122538, 1.209147, 0.6903842, -0.26646265, 1.516799, -1.8540707, + -0.74240327, 1.7811562, -0.03699546, -0.44468504, -1.4982721, -1.1858582, + -0.21140318, -0.974522, 1.0000849, -1.294535, -0.6108882, 0.25827602, + 1.3631831, -0.5180266, 0.20870179, 0.18333802, -0.42263857, -1.6694735, + 0.0415236, -0.3903758, 2.0933757, -0.29660916, 2.1218338, -1.1599928, + 0.57163256, 0.48865932, -1.3622656, 0.35924262, 1.2951899, -0.1769997, + 0.74513537, -0.31920406, -1.2902768, -0.7095059, 1.9157801, -0.41028237, + 1.2502829, 0.3354887, 1.4199319, -0.20366786, -0.8828556, 0.5173567, + 1.7708117, -0.30096334}; + const float ker[] = { + -0.19805557, 0.58464956, -0.7804337, 0.06974592, 0.45790604, 0.24833807, 0.43393376, + 0.2541043, -0.04406675, -0.32167575, 1.0546446, -1.4978354, 0.20829494, 1.1659569, + 0.37908667, -0.94137955, 0.293349, -1.1023049, 0.76133233, 0.55595005, 1.4458209, + 1.6128604, 1.5655615, -2.183877, -0.90535915, -0.49858555, 1.7168728, -1.1590382, + 0.6706056, 1.2215618, -0.06603386, 0.16559464, 0.541991, -0.44488335, 0.766181, + 1.0227629, -0.6352362, -1.670828, -0.63334507, 0.0313305, -0.6721083, 0.50112915, + -0.15218066, 0.67222077, -0.3613627, -0.08516614, -0.5024078, -0.9503976, -2.1892295, + 1.8308185, -0.15187284, 1.5761136, 0.24869336, -1.7378871, -0.22518761, 1.0175673, + 0.7084485, -0.74157554, -1.8185995, -1.3330095, -0.04427439, 1.0556892, -0.68243974, + 0.32001218, 2.0901792, -1.1612813, 0.7294674, 0.05740008, -0.00832882, 1.0446658, + 0.4477195, -0.09174404, -1.0176039, 1.5066665, -2.148343, 0.29421416, 0.93011874, + -0.15737922, -1.6444012, 0.25780794, -0.6545867, -0.3488956, 0.26167992, -0.154414, + 0.2798124, -0.8590068, 2.0494444, 0.48268002, 0.81941164, -0.4848027, 0.76870304, + 0.7102261, 0.45778143, 0.23214905, -0.17742023, -0.75016516}; + const float ofm[] = {4.474646, 0.6792067, -1.9799856, 7.484751, 4.3087378, -1.905938, + 1.4887369, 0.4361322, 0.79539883, -3.8583446, -4.502204, 4.356392, + -5.3030324, 3.493003, -4.349277, 2.3069482, -3.8881323, -0.73901534, + -0.6629516, 2.1247253, -4.9229584, 1.6716996, -3.0208125, 1.0597891}; + + run_test(ifm, ker, ofm, + Shape{1, 5, 5, 2}, Shape{4, 4, 2, 3}, Shape{1, 2, 2, 6}, // shapes of input, ker, output + 1, 1 // stride + ); +} + +// TODO Add same padding test + +// clang-format on -- 2.7.4