--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Index.h>
+#include <nncc/core/ADT/tensor/IndexEnumerator.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+
+#include <limits>
+#include <cassert>
+#include <algorithm>
+#include <stdexcept>
+
+namespace
+{
+
+inline uint32_t compute_out_size(uint32_t image_size, uint32_t filter_size, uint32_t stride)
+{
+ return (image_size + stride - filter_size) / stride;
+}
+
+using nncc::core::ADT::tensor::Buffer;
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::Index;
+using nncc::core::ADT::tensor::IndexEnumerator;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+
+/**
+ * @brief Calculates Conv2D
+ * @note Both input_buf and filter_buf have NHWC format
+ */
+template <typename RET_T, typename IFM_T, typename FIL_T>
+Buffer<RET_T> calc_conv2D(const loco::Conv2D *conv2d, const Buffer<IFM_T> *input_buf,
+ const Buffer<FIL_T> *filter_buf)
+{
+ // TODO Consider complex padding. This version works only when padding = {0,0,0,0}
+ auto pad = conv2d->pad();
+ assert(pad->bottom() == 0 && pad->top() == 0 && pad->right() == 0 && pad->left() == 0);
+
+ auto input_shape = input_buf->shape();
+ auto filter_shape = filter_buf->shape();
+
+ assert(input_shape.rank() == 4);
+ assert(filter_shape.rank() == 4);
+ assert(input_shape.dim(3) == filter_shape.dim(3)); // should have same channel values
+
+ const uint32_t input_height = input_shape.dim(1);
+ const uint32_t input_width = input_shape.dim(2);
+
+ const uint32_t filter_height = filter_shape.dim(1);
+ const uint32_t filter_width = filter_shape.dim(2);
+
+ const uint32_t stride_width = conv2d->stride()->horizontal();
+ const uint32_t stride_height = conv2d->stride()->vertical();
+
+ // loco invariant
+ assert((input_height - filter_height) % stride_height == 0);
+ assert((input_width - filter_width) % stride_width == 0);
+
+ // TODO Enable dilations. Let's set these to 1 for now.
+ const uint32_t dilation_width_factor = 1;
+ const uint32_t dilation_height_factor = 1;
+
+ // TODO enable padding.
+ const uint32_t pad_width = 0; // with padding, pad_width = conv2d->pad()->left();
+ const uint32_t pad_height = 0; // with padding, pad_height = conv2d->pad()->top();
+
+ const uint32_t output_height = compute_out_size(input_height, filter_height, stride_height);
+ const uint32_t output_width = compute_out_size(input_width, filter_width, stride_width);
+
+ const uint32_t batches = input_shape.dim(0);
+ const uint32_t input_depth = input_shape.dim(3);
+ const uint32_t output_depth = filter_shape.dim(0);
+
+ Shape output_shape{batches, output_height, output_width, output_depth};
+ auto output_buf = make_buffer<RET_T, LexicalLayout>(output_shape);
+
+ for (uint32_t batch = 0; batch < batches; ++batch)
+ {
+ for (uint32_t out_y = 0; out_y < output_height; ++out_y)
+ {
+ for (uint32_t out_x = 0; out_x < output_width; ++out_x)
+ {
+ for (uint32_t out_channel = 0; out_channel < output_depth; ++out_channel)
+ {
+ const int in_x_origin = (out_x * stride_width) - pad_width;
+ const int in_y_origin = (out_y * stride_height) - pad_height;
+
+ RET_T total = static_cast<RET_T>(0);
+
+ for (uint32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+ {
+ for (uint32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+ {
+ for (uint32_t in_channel = 0; in_channel < input_depth; ++in_channel)
+ {
+ const uint32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+ const uint32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+
+ // If the location is outside the bounds of the input image,
+ // use zero as a default value.
+ if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height))
+ {
+ auto input_value = input_buf->at(Index({batch, in_y, in_x, in_channel}));
+ auto filter_value =
+ filter_buf->at(Index({out_channel, filter_y, filter_x, in_channel}));
+ total += (input_value * filter_value);
+ }
+ }
+ }
+ }
+ output_buf.at(Index({batch, out_y, out_x, out_channel})) = total;
+ }
+ }
+ }
+ }
+ return output_buf;
+}
+
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Conv2D *conv2d)
+{
+ auto ifm_data = annot_data(conv2d->ifm());
+ auto ker_data = annot_data(conv2d->ker());
+
+ if (!ifm_data || ifm_data->shape()->rank() != 4)
+ {
+ throw std::runtime_error("Can't find input data of Conv2D");
+ }
+ if (!ker_data || ker_data->shape()->rank() != 4)
+ {
+ throw std::runtime_error("Can't find kernel data of Conv2D");
+ }
+
+ if (annot_domain(conv2d->ifm()) != loco::Domain::Feature)
+ {
+ throw std::runtime_error("IFM of Conv2D is not feature");
+ }
+
+ if (annot_domain(conv2d->ker()) != loco::Domain::Filter)
+ {
+ throw std::runtime_error("Kernel of Conv2D is not filter");
+ }
+
+ std::unique_ptr<NodeData> conv2d_result = nullptr;
+
+ if (ifm_data->dtype() == loco::DataType::FLOAT32 && ker_data->dtype() == loco::DataType::FLOAT32)
+ {
+ auto ifm_buf = ifm_data->as_f32_bufptr();
+ auto ker_buf = ker_data->as_f32_bufptr();
+
+ auto conv2d_buf = calc_conv2D<float, float, float>(conv2d, ifm_buf, ker_buf);
+
+ conv2d_result = make_data(conv2d_buf);
+ }
+ else
+ throw std::runtime_error("NYI for these DataTypes");
+
+ assert(conv2d_result != nullptr);
+
+ erase_annot_data(conv2d);
+ annot_data(conv2d, std::move(conv2d_result));
+ annot_domain(conv2d, loco::Domain::Feature);
+}
+
+} // namespace locomotiv
--- /dev/null
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NodeExecution.h"
+
+#include "locomotiv/NodeData.h"
+#include "NodeDataImpl.h"
+#include "NodeDomain.h"
+
+#include <nncc/core/ADT/tensor/Shape.h>
+#include <nncc/core/ADT/tensor/Buffer.h>
+#include <nncc/core/ADT/tensor/Overlay.h>
+#include <nncc/core/ADT/tensor/LexicalLayout.h>
+#include "nncc/core/ADT/tensor/IndexEnumerator.h"
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using nncc::core::ADT::tensor::Shape;
+using nncc::core::ADT::tensor::LexicalLayout;
+using nncc::core::ADT::tensor::make_buffer;
+using nncc::core::ADT::tensor::make_overlay;
+
+// TODO Add test for padding
+void run_test(const float *ifm, const float *ker, const float *expected_ofm, const Shape &ifm_shape,
+ const Shape ker_shape, const Shape ofm_shape, const uint32_t stride_v,
+ const uint32_t stride_h)
+{
+ auto g = loco::make_graph();
+
+ // Fill output data of FeatureEncode from ifm
+ auto ifm_enc = g->nodes()->create<loco::FeatureEncode>();
+ {
+ auto ifm_enc_buf = make_buffer<float, LexicalLayout>(ifm_shape);
+ auto ifm_overlay = make_overlay<float, LexicalLayout>(ifm_shape, const_cast<float *>(ifm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ifm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ifm_enc_buf.at(ind) = ifm_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(ifm_enc_buf);
+ locomotiv::annot_data(ifm_enc, std::move(enc_data));
+ locomotiv::annot_domain(ifm_enc, loco::Domain::Feature);
+ }
+
+ // Fill output data of FilterEncode from ker
+ auto ker_enc = g->nodes()->create<loco::FilterEncode>();
+ {
+ auto ker_enc_buf = make_buffer<float, LexicalLayout>(ker_shape);
+ auto ker_overlay = make_overlay<float, LexicalLayout>(ker_shape, const_cast<float *>(ker));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ker_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ker_enc_buf.at(ind) = ker_overlay.at(ind);
+ }
+
+ auto enc_data = locomotiv::make_data(ker_enc_buf);
+ locomotiv::annot_data(ker_enc, std::move(enc_data));
+ locomotiv::annot_domain(ker_enc, loco::Domain::Filter);
+ }
+
+ // build Conv2D
+ auto conv2d = g->nodes()->create<loco::Conv2D>();
+ conv2d->ifm(ifm_enc);
+ conv2d->ker(ker_enc);
+ conv2d->stride()->vertical(stride_v);
+ conv2d->stride()->horizontal(stride_h);
+
+ // run interpreter
+ locomotiv::NodeExecution::get().run(conv2d);
+
+ // get result of calculation
+ auto conv2d_result = locomotiv::annot_data(conv2d);
+
+ // check the result
+ ASSERT_NE(conv2d_result, nullptr);
+ ASSERT_TRUE(conv2d_result->dtype() == loco::DataType::FLOAT32);
+ ASSERT_TRUE(*(conv2d_result->shape()) == ofm_shape);
+
+ auto ofm_overlay =
+ make_overlay<float, LexicalLayout>(ofm_shape, const_cast<float *>(expected_ofm));
+ for (nncc::core::ADT::tensor::IndexEnumerator e{ofm_shape}; e.valid(); e.advance())
+ {
+ const auto &ind = e.current();
+ ASSERT_FLOAT_EQ(conv2d_result->as_f32_bufptr()->at(ind), ofm_overlay.at(ind));
+ }
+
+ ASSERT_EQ(locomotiv::annot_domain(conv2d), loco::Domain::Feature);
+}
+
+} // namespace
+
+// clang-format off
+/* ifm and ofm are from the code below:
+
+ifm = tf.random_normal([1, 5, 5, 1], stddev=1)
+ker = tf.random_normal([3, 3, 1, 1], stddev=1)
+out = tf.nn.conv2d(ifm, ker, strides = [1, 2, 2, 1], padding= 'VALID')
+
+with tf.Session() as sess:
+ print(sess.run(out))
+*/
+
+TEST(NodeExecution_Conv2D, f32_1x5x5x1_calculation)
+{
+ using nncc::core::ADT::tensor::Shape;
+
+ const float ifm[] =
+ {
+ -0.48850584, 1.4292705, -1.3424522, -0.7441476, -1.8964586,
+ 1.7021934, -0.39246717, 0.6248314, 0.12724274, 1.3915083,
+ 0.382255, 0.7725081, 0.9171561, -1.1847119, 0.61858755,
+ 1.1530193, -0.476239, -0.9038663, -0.48764458, 0.339963,
+ 2.2817912, -0.8464133, -1.0598192, 0.8361126, 1.2344601
+ };
+
+ const float ker[] =
+ {
+ -0.0830195, 0.21088193, -0.11781317,
+ 0.07755677, 1.6337638, 1.0792778,
+ -1.6922939, -1.5437212, 0.96667504
+ };
+
+ const float ofm[] =
+ {
+ -0.28752697, 2.8108592,
+ -5.220376 , 0.7973861
+ };
+
+ run_test(ifm, ker, ofm,
+ Shape{1, 5, 5, 1}, Shape{1, 3, 3, 1}, Shape{1, 2, 2, 1}, // shapes of input, ker, output
+ 2, 2 // stride
+ );
+}
+
+TEST(NodeExecution_Conv2D, f32_multiple_channel)
+{
+ // testing channel != 1, stride = [1,1]
+ using nncc::core::ADT::tensor::Shape;
+
+ float ifm[1*5*5*3];
+ for (int n = 0; n < 5*5*3; n++) ifm[n] = 2.2;
+
+ float ker[2*2*2*3]; // nhwc
+ for (int n = 0; n < 2*2*2*3; n++) ker[n] = 1.1;
+
+ float ofm[1*4*4*2];
+ for (int n = 0; n < 1*4*4*2; n++) ofm[n] = 29.04;
+
+ run_test(ifm, ker, ofm,
+ Shape{1, 5, 5, 3}, Shape{2, 2, 2, 3}, Shape{1, 4, 4, 2}, // shapes of input, ker, output
+ 1, 1 // stride
+ );
+}
+// clang-format on