From 92c0e93c6b2dc2bf166b5b0e30c47f204b20e209 Mon Sep 17 00:00:00 2001
From: Jihoon Lee <jhoon.it.lee@samsung.com>
Date: Thu, 17 Sep 2020 19:43:48 +0900
Subject: [PATCH] [Tensor] Fix bug in the broadcast support

Fix bug that strides and buffer axis are miscalculated

**Changes proposed in this PR:**
- Clarified consecutive-one strategy and same-stride strategy
- Change last stride to 0 only if it is using consecutive-one strategy
- Add regression test

Resolves: #559

**Self evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped

Cc: Jijoong Moon <jijoong.moon@samsung.com>
Signed-off-by: Jihoon Lee <jhoon.it.lee@samsung.com>
---
 nntrainer/src/tensor.cpp                    | 64 ++++++++++++++++++++---------
 test/include/nntrainer_test_util.h          |  7 ++++
 test/nntrainer_test_util.cpp                | 17 ++++++--
 test/unittest/unittest_nntrainer_tensor.cpp | 45 +++++++++++++++++++-
 4 files changed, 108 insertions(+), 25 deletions(-)

diff --git a/nntrainer/src/tensor.cpp b/nntrainer/src/tensor.cpp
index 6787f34..4b4cbc2 100644
--- a/nntrainer/src/tensor.cpp
+++ b/nntrainer/src/tensor.cpp
@@ -243,6 +243,17 @@ int Tensor::operator_i(
   std::function<void(const BroadcastInfo &e, float *, const float *)> v_func) {
 
   BroadcastInfo e;
+
+  /// shortcut to cover when dimension matches
+  /// note that buffer_size, the last stride is only used in v_func but it might
+  /// be changed
+  if (dim == m.dim) {
+    e.buffer_size = length();
+    e.strides[3] = 1;
+    v_func(e, getData(), m.getData());
+    return ML_ERROR_NONE;
+  }
+
   try {
     e = this->computeBroadcastInfo(m);
   } catch (std::exception &err) {
@@ -784,13 +795,17 @@ BroadcastInfo Tensor::computeBroadcastInfo(const Tensor &m) {
 
   BroadcastInfo e;
 
+  /// checking if given Tensor's can be broadcasted
   for (unsigned int i = 0; i < MAXDIM; ++i) {
-    if (m_dim.getTensorDim(i) == 1) {
+    if (dim.getTensorDim(i) == m_dim.getTensorDim(i)) {
+      e.strides[i] = m.strides[i];
       continue;
     }
 
-    if (dim.getTensorDim(i) == m_dim.getTensorDim(i)) {
-      e.strides[i] = m.strides[i];
+    /// If given dimension is 1, it could be reuesed, the stride remaining 0
+    /// Need to check if dim[i] == 1 && m_dim[i] == 1 first though
+    /// If so, strides should not change
+    if (m_dim.getTensorDim(i) == 1) {
       continue;
     }
 
@@ -802,30 +817,41 @@ BroadcastInfo Tensor::computeBroadcastInfo(const Tensor &m) {
   }
 
   /// calculate inner loop size
-  unsigned int inner_loop_size = dim.getTensorDim(3);
-
-  bool first_dim_none = m_dim.getTensorDim(3) == 1 && dim.getTensorDim(3) != 1;
+  e.buffer_size = 1;
   e.buffer_axis = -1;
-  for (int axis = 2; axis >= 0; --axis) {
-    unsigned int cur_dim = m_dim.getTensorDim(axis);
+  e.strides[3] = m.strides[3];
 
-    if (first_dim_none) {
-      if (cur_dim != 1) {
-        e.buffer_axis = axis;
-        break;
-      }
-    } else {
-      if (cur_dim == 1 && dim.getTensorDim(axis) != m_dim.getTensorDim(axis)) {
-        e.buffer_axis = axis;
+  /// initiate buffer info with matching dimension strategy
+  for (int axis = 3; axis >= 0; --axis) {
+    if (dim.getTensorDim(axis) != m_dim.getTensorDim(axis)) {
+      e.buffer_axis = axis;
+      break;
+    }
+
+    e.buffer_size *= dim.getTensorDim(axis);
+  }
+
+  /// check strategy that uses consecutive ones
+  if (m_dim.getTensorDim(3) == 1) {
+    unsigned int inner_loop_size = 1;
+    int axis;
+    for (axis = 3; axis >= 0; --axis) {
+      if (m_dim.getTensorDim(axis) != 1) {
         break;
       }
+
+      inner_loop_size *= dim.getTensorDim(axis);
     }
 
-    inner_loop_size *= dim.getTensorDim(axis);
+    /// if consecutive-one strategy has bigger chunk size, replace the
+    /// information
+    if (inner_loop_size > e.buffer_size) {
+      e.buffer_axis = axis;
+      e.buffer_size = inner_loop_size;
+      e.strides[3] = 0;
+    }
   }
 
-  e.buffer_size = inner_loop_size;
-
   return e;
 }
 } /* namespace nntrainer */
diff --git a/test/include/nntrainer_test_util.h b/test/include/nntrainer_test_util.h
index 471b000..3b52ba1 100644
--- a/test/include/nntrainer_test_util.h
+++ b/test/include/nntrainer_test_util.h
@@ -368,6 +368,13 @@ nntrainer::Tensor ranged(unsigned int batch, unsigned channel, unsigned height,
                          unsigned width);
 
 /**
+ * @brief return a tensor filled with random value with given dimension
+ */
+nntrainer::Tensor randUniform(unsigned int batch, unsigned channel,
+                              unsigned height, unsigned width, float min = -1,
+                              float max = 1);
+
+/**
  * @brief replace string and save in file
  * @param[in] from string to be replaced
  * @param[in] to string to repalce with
diff --git a/test/nntrainer_test_util.cpp b/test/nntrainer_test_util.cpp
index 7134aa7..abec18b 100644
--- a/test/nntrainer_test_util.cpp
+++ b/test/nntrainer_test_util.cpp
@@ -263,21 +263,30 @@ int getBatch_val(float **outVec, float **outLabel, bool *last,
 /**
  * @brief return a tensor filled with contant value with dimension
  */
-nntrainer::Tensor constant(float value, unsigned int batch, unsigned channel,
-                           unsigned height, unsigned width) {
+nntrainer::Tensor constant(float value, unsigned int batch,
+                           unsigned int channel, unsigned int height,
+                           unsigned int width) {
   nntrainer::Tensor t(batch, channel, height, width);
   t.setValue(value);
 
   return t;
 }
 
-nntrainer::Tensor ranged(unsigned int batch, unsigned channel, unsigned height,
-                         unsigned width) {
+nntrainer::Tensor ranged(unsigned int batch, unsigned int channel,
+                         unsigned int height, unsigned int width) {
   nntrainer::Tensor t(batch, channel, height, width);
   unsigned int i = 0;
   return t.apply([&](float in) { return i++; });
 }
 
+nntrainer::Tensor randUniform(unsigned int batch, unsigned int channel,
+                              unsigned int height, unsigned int width,
+                              float min, float max) {
+  nntrainer::Tensor t(batch, channel, height, width);
+  t.setRandUniform(min, max);
+  return t;
+}
+
 void IniSection::setEntry(const std::string &entry_str) {
   // setting property separated by "|"
   std::regex words_regex("[^|]+");
diff --git a/test/unittest/unittest_nntrainer_tensor.cpp b/test/unittest/unittest_nntrainer_tensor.cpp
index 648552e..04d9d39 100644
--- a/test/unittest/unittest_nntrainer_tensor.cpp
+++ b/test/unittest/unittest_nntrainer_tensor.cpp
@@ -1161,6 +1161,27 @@ TEST(nntrainer_Tensor, add_i_broadcast_01_p) {
     EXPECT_EQ(status, ML_ERROR_NONE);
     EXPECT_EQ(t, answer);
   }
+  {
+    nntrainer::TensorDim ref_dim(1, 1, 2, 1);
+    nntrainer::Tensor t = ranged(1, 1, 2, 1);
+    nntrainer::Tensor m = ranged(1, 1, 2, 1);
+    float answer_data[] = {0.0, 2.0};
+    nntrainer::Tensor answer(ref_dim, answer_data);
+    int status = t.add_i(m);
+    EXPECT_EQ(status, ML_ERROR_NONE);
+    EXPECT_EQ(t, answer);
+  }
+  {
+    nntrainer::TensorDim ref_dim(16, 1, 1, 1);
+    nntrainer::Tensor t = ranged(16, 1, 1, 1);
+    nntrainer::Tensor m = ranged(1, 1, 1, 1);
+    float answer_data[] = {0.0, 1.0, 2.0,  3.0,  4.0,  5.0,  6.0,  7.0,
+                           8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0};
+    nntrainer::Tensor answer(ref_dim, answer_data);
+    int status = t.add_i(m);
+    EXPECT_EQ(status, ML_ERROR_NONE);
+    EXPECT_EQ(t, answer);
+  }
 }
 
 TEST(nntrainer_Tensor, add_i_broadcast_not_supported_01_n) {
@@ -1940,8 +1961,8 @@ TEST(nntrainer_Tensor, print_large_size) {
 }
 
 TEST(nntrainer_Tensor, DISABLED_broadcast_info_n) {
-  nntrainer::Tensor t = ranged(3, 5, 1, 4);
-  nntrainer::Tensor b = ranged(1, 5, 1, 4);
+  nntrainer::Tensor t = ranged(1, 1, 2, 1);
+  nntrainer::Tensor b = ranged(1, 1, 2, 1);
 
   auto vector_func = [](float *buf, int stride, int size) {
     float *cur = buf;
@@ -2020,6 +2041,26 @@ TEST(nntrainer_Tensor, DISABLED_broadcast_info_n) {
   std::cerr << "buffer_size: " << e.buffer_size << std::endl;
 }
 
+TEST(nntrainer_Tensor, DISABLED_equation_test_01_p) {
+  nntrainer::Tensor a, b, c;
+  nntrainer::Tensor ret1, ret2;
+
+  a = randUniform(4, 6, 7, 3, -100, 100);
+  b = randUniform(4, 6, 7, 3, -100, 100);
+  c = randUniform(4, 6, 7, 3, -100, 100);
+
+  ret1 = a.subtract(b).multiply(c);
+  ret2 = a.multiply(c).subtract(b.multiply(c));
+
+  float *data1 = ret1.getData();
+  float *data2 = ret2.getData();
+  EXPECT_EQ(ret1, ret2);
+
+  for (unsigned int i = 0; i < ret1.length(); ++i) {
+    EXPECT_FLOAT_EQ(data1[i], data2[i]);
+  }
+}
+
 /**
  * @brief Main gtest
  */
-- 
2.7.4