[locomotiv] Conv2D (padding is not yet added) (#3725)
author윤현식/On-Device Lab(SR)/Principal Engineer/삼성전자 <hyunsik.yoon@samsung.com>
Wed, 12 Jun 2019 05:58:56 +0000 (14:58 +0900)
committer박세희/On-Device Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>
Wed, 12 Jun 2019 05:58:56 +0000 (14:58 +0900)
* [locomotiv] Conv2D (padding is not yet added)

This adds Conv2D implementation of locomotiv. Padding is not implemented yet.

Signed-off-by: Hyun Sik Yoon <hyunsik.yoon@samsung.com>
* revised per comments

* Fixed type. Added TODO

* int to uint32_t

contrib/locomotiv/src/Node.lst
contrib/locomotiv/src/Node/Conv2D.cpp [new file with mode: 0644]
contrib/locomotiv/src/Node/Conv2D.test.cpp [new file with mode: 0644]

index 37f3b8a..1a39639 100644 (file)
@@ -5,6 +5,7 @@
 // NODE(Name) : alphabetic order please
 
 NODE(ConstGen)
+NODE(Conv2D)
 NODE(FeatureDecode)
 NODE(FeatureEncode)
 NODE(FilterEncode)
diff --git a/contrib/locomotiv/src/Node/Conv2D.cpp b/contrib/locomotiv/src/Node/Conv2D.cpp
new file mode 100644 (file)
index 0000000..dbfe52d
--- /dev/null
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <limits>
+#include <cassert>
+#include <algorithm>
+#include <stdexcept>
+
+namespace
+{
+
+inline uint32_t compute_out_size(uint32_t image_size, uint32_t filter_size, uint32_t stride)
+{
+  return (image_size + stride - filter_size) / stride;
+}
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+/**
+ * @brief Calculates Conv2D
+ * @note  Both input_buf and filter_buf have NHWC format
+ */
+template <typename RET_T, typename IFM_T, typename FIL_T>
+Buffer<RET_T> calc_conv2D(const loco::Conv2D *conv2d, const Buffer<IFM_T> *input_buf,
+                          const Buffer<FIL_T> *filter_buf)
+{
+  // TODO Consider complex padding. This version works only when padding = {0,0,0,0}
+  auto pad = conv2d->pad();
+  assert(pad->bottom() == 0 && pad->top() == 0 && pad->right() == 0 && pad->left() == 0);
+
+  auto input_shape = input_buf->shape();
+  auto filter_shape = filter_buf->shape();
+
+  assert(input_shape.rank() == 4);
+  assert(filter_shape.rank() == 4);
+  assert(input_shape.dim(3) == filter_shape.dim(3)); // should have same channel values
+
+  const uint32_t input_height = input_shape.dim(1);
+  const uint32_t input_width = input_shape.dim(2);
+
+  const uint32_t filter_height = filter_shape.dim(1);
+  const uint32_t filter_width = filter_shape.dim(2);
+
+  const uint32_t stride_width = conv2d->stride()->horizontal();
+  const uint32_t stride_height = conv2d->stride()->vertical();
+
+  // loco invariant
+  assert((input_height - filter_height) % stride_height == 0);
+  assert((input_width - filter_width) % stride_width == 0);
+
+  // TODO Enable dilations. Let's set these to 1 for now.
+  const uint32_t dilation_width_factor = 1;
+  const uint32_t dilation_height_factor = 1;
+
+  // TODO enable padding.
+  const uint32_t pad_width = 0;  // with padding, pad_width = conv2d->pad()->left();
+  const uint32_t pad_height = 0; // with padding, pad_height = conv2d->pad()->top();
+
+  const uint32_t output_height = compute_out_size(input_height, filter_height, stride_height);
+  const uint32_t output_width = compute_out_size(input_width, filter_width, stride_width);
+
+  const uint32_t batches = input_shape.dim(0);
+  const uint32_t input_depth = input_shape.dim(3);
+  const uint32_t output_depth = filter_shape.dim(0);
+
+  Shape output_shape{batches, output_height, output_width, output_depth};
+  auto output_buf = make_buffer<RET_T, LexicalLayout>(output_shape);
+
+  for (uint32_t batch = 0; batch < batches; ++batch)
+  {
+    for (uint32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (uint32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (uint32_t out_channel = 0; out_channel < output_depth; ++out_channel)
+        {
+          const int in_x_origin = (out_x * stride_width) - pad_width;
+          const int in_y_origin = (out_y * stride_height) - pad_height;
+
+          RET_T total = static_cast<RET_T>(0);
+
+          for (uint32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (uint32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              for (uint32_t in_channel = 0; in_channel < input_depth; ++in_channel)
+              {
+                const uint32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+                const uint32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+
+                // If the location is outside the bounds of the input image,
+                // use zero as a default value.
+                if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
+                {
+                  auto input_value = input_buf->at(Index({batch, in_y, in_x, in_channel}));
+                  auto filter_value =
+                      filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
+                  total += (input_value * filter_value);
+                }
+              }
+            }
+          }
+          output_buf.at(Index({batch, out_y, out_x, out_channel})) = total;
+        }
+      }
+    }
+  }
+  return output_buf;
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Conv2D *conv2d)
+{
+  auto ifm_data = annot_data(conv2d->ifm());
+  auto ker_data = annot_data(conv2d->ker());
+
+  if (!ifm_data || ifm_data->shape()->rank() != 4)
+  {
+    throw std::runtime_error("Can't find input data of Conv2D");
+  }
+  if (!ker_data || ker_data->shape()->rank() != 4)
+  {
+    throw std::runtime_error("Can't find kernel data of Conv2D");
+  }
+
+  if (annot_domain(conv2d->ifm()) != loco::Domain::Feature)
+  {
+    throw std::runtime_error("IFM of Conv2D is not feature");
+  }
+
+  if (annot_domain(conv2d->ker()) != loco::Domain::Filter)
+  {
+    throw std::runtime_error("Kernel of Conv2D is not filter");
+  }
+
+  std::unique_ptr<NodeData> conv2d_result = nullptr;
+
+  if (ifm_data->dtype() == loco::DataType::FLOAT32 && ker_data->dtype() == loco::DataType::FLOAT32)
+  {
+    auto ifm_buf = ifm_data->as_f32_bufptr();
+    auto ker_buf = ker_data->as_f32_bufptr();
+
+    auto conv2d_buf = calc_conv2D<float, float, float>(conv2d, ifm_buf, ker_buf);
+
+    conv2d_result = make_data(conv2d_buf);
+  }
+  else
+    throw std::runtime_error("NYI for these DataTypes");
+
+  assert(conv2d_result != nullptr);
+
+  erase_annot_data(conv2d);
+  annot_data(conv2d, std::move(conv2d_result));
+  annot_domain(conv2d, loco::Domain::Feature);
+}
+
+} // namespace locomotiv
diff --git a/contrib/locomotiv/src/Node/Conv2D.test.cpp b/contrib/locomotiv/src/Node/Conv2D.test.cpp
new file mode 100644 (file)
index 0000000..9f50ae7
--- /dev/null
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include "nncc/core/ADT/tensor/IndexEnumerator.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::make_overlay;
+
+// TODO Add test for padding
+void run_test(const float *ifm, const float *ker, const float *expected_ofm, const Shape &ifm_shape,
+              const Shape ker_shape, const Shape ofm_shape, const uint32_t stride_v,
+              const uint32_t stride_h)
+{
+  auto g = loco::make_graph();
+
+  // Fill output data of FeatureEncode from ifm
+  auto ifm_enc = g->nodes()->create<loco::FeatureEncode>();
+  {
+    auto ifm_enc_buf = make_buffer<float, LexicalLayout>(ifm_shape);
+    auto ifm_overlay = make_overlay<float, LexicalLayout>(ifm_shape, const_cast<float *>(ifm));
+    for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance())
+    {
+      const auto &ind = e.current();
+      ifm_enc_buf.at(ind) = ifm_overlay.at(ind);
+    }
+
+    auto enc_data = locomotiv::make_data(ifm_enc_buf);
+    locomotiv::annot_data(ifm_enc, std::move(enc_data));
+    locomotiv::annot_domain(ifm_enc, loco::Domain::Feature);
+  }
+
+  // Fill output data of FilterEncode from ker
+  auto ker_enc = g->nodes()->create<loco::FilterEncode>();
+  {
+    auto ker_enc_buf = make_buffer<float, LexicalLayout>(ker_shape);
+    auto ker_overlay = make_overlay<float, LexicalLayout>(ker_shape, const_cast<float *>(ker));
+    for (nncc::core::ADT::tensor::IndexEnumerator e{ker_shape}; e.valid(); e.advance())
+    {
+      const auto &ind = e.current();
+      ker_enc_buf.at(ind) = ker_overlay.at(ind);
+    }
+
+    auto enc_data = locomotiv::make_data(ker_enc_buf);
+    locomotiv::annot_data(ker_enc, std::move(enc_data));
+    locomotiv::annot_domain(ker_enc, loco::Domain::Filter);
+  }
+
+  // build Conv2D
+  auto conv2d = g->nodes()->create<loco::Conv2D>();
+  conv2d->ifm(ifm_enc);
+  conv2d->ker(ker_enc);
+  conv2d->stride()->vertical(stride_v);
+  conv2d->stride()->horizontal(stride_h);
+
+  // run interpreter
+  locomotiv::NodeExecution::get().run(conv2d);
+
+  // get result of calculation
+  auto conv2d_result = locomotiv::annot_data(conv2d);
+
+  // check the result
+  ASSERT_NE(conv2d_result, nullptr);
+  ASSERT_TRUE(conv2d_result->dtype() == loco::DataType::FLOAT32);
+  ASSERT_TRUE(*(conv2d_result->shape()) == ofm_shape);
+
+  auto ofm_overlay =
+      make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+  for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
+  {
+    const auto &ind = e.current();
+    ASSERT_FLOAT_EQ(conv2d_result->as_f32_bufptr()->at(ind), ofm_overlay.at(ind));
+  }
+
+  ASSERT_EQ(locomotiv::annot_domain(conv2d), loco::Domain::Feature);
+}
+
+} // namespace
+
+// clang-format off
+/* ifm and ofm are from the code below:
+
+ifm = tf.random_normal([1, 5, 5, 1], stddev=1)
+ker = tf.random_normal([3, 3, 1, 1], stddev=1)
+out = tf.nn.conv2d(ifm, ker, strides = [1, 2, 2, 1], padding= 'VALID')
+
+with tf.Session() as sess:
+    print(sess.run(out))
+*/
+
+TEST(NodeExecution_Conv2D, f32_1x5x5x1_calculation)
+{
+  using nncc::core::ADT::tensor::Shape;
+
+  const float ifm[] =
+  {
+    -0.48850584,  1.4292705,  -1.3424522, -0.7441476,  -1.8964586,
+     1.7021934,  -0.39246717,  0.6248314,  0.12724274,  1.3915083,
+     0.382255,    0.7725081,   0.9171561, -1.1847119,   0.61858755,
+     1.1530193,  -0.476239,   -0.9038663, -0.48764458,  0.339963,
+     2.2817912,  -0.8464133,  -1.0598192,  0.8361126,   1.2344601
+  };
+
+  const float ker[] =
+  {
+    -0.0830195,  0.21088193, -0.11781317,
+     0.07755677, 1.6337638,   1.0792778,
+    -1.6922939, -1.5437212,   0.96667504
+  };
+
+  const float ofm[] =
+  {
+    -0.28752697, 2.8108592,
+    -5.220376  , 0.7973861
+  };
+
+  run_test(ifm, ker, ofm,
+           Shape{1, 5, 5, 1}, Shape{1, 3, 3, 1}, Shape{1, 2, 2, 1}, // shapes of input, ker, output
+           2, 2  // stride
+  );
+}
+
+TEST(NodeExecution_Conv2D, f32_multiple_channel)
+{
+  // testing channel != 1, stride = [1,1]
+  using nncc::core::ADT::tensor::Shape;
+
+  float ifm[1*5*5*3];
+  for (int n = 0; n < 5*5*3; n++) ifm[n] = 2.2;
+
+  float ker[2*2*2*3]; // nhwc
+  for (int n = 0; n < 2*2*2*3; n++) ker[n] = 1.1;
+
+  float ofm[1*4*4*2];
+  for (int n = 0; n < 1*4*4*2; n++) ofm[n] = 29.04;
+
+  run_test(ifm, ker, ofm,
+           Shape{1, 5, 5, 3}, Shape{2, 2, 2, 3}, Shape{1, 4, 4, 2}, // shapes of input, ker, output
+           1, 1  // stride
+  );
+}
+// clang-format on