From 6c2b586232d2a2891baedf8b64a64303b8b37dc9 Mon Sep 17 00:00:00 2001
From: "jijoong.moon" <jijoong.moon@samsung.com>
Date: Thu, 10 Aug 2023 10:29:44 +0900
Subject: [PATCH] [Bug] Fix the nhwc test bug

We do need to add the format information during layer test.
This pr add the format change for the input tensor.

**Self evaluation:**
1. Build test:	 [X]Passed [ ]Failed [ ]Skipped
2. Run test:	 [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: jijoong.moon <jijoong.moon@samsung.com>
---
 nntrainer/layers/fc_layer.cpp                      |  9 ++--
 nntrainer/tensor/tensor.h                          | 51 ++++++++++------------
 test/unittest/layers/layers_golden_tests.cpp       | 51 ++++++++++++----------
 .../layers/unittest_layers_fully_connected.cpp     |  2 +
 4 files changed, 59 insertions(+), 54 deletions(-)
diff --git a/nntrainer/layers/fc_layer.cpp b/nntrainer/layers/fc_layer.cpp
index 9a87e04..a9f8dd0 100644
--- a/nntrainer/layers/fc_layer.cpp
+++ b/nntrainer/layers/fc_layer.cpp
@@ -77,14 +77,15 @@ void FullyConnectedLayer::finalize(InitLayerContext &context) {
   // @todo : This NCHW format setting is just temporal, it needs to be set by
   // global configuration
   TensorDim bias_dim(
-    1, 1, 1, unit,
+    1, is_nchw ? 1 : unit, 1, is_nchw ? unit : 1,
     TensorDim::TensorType(context.getFormat(), context.getWeightDataType()),
-    0b0001);
+    is_nchw ? 0b0001 : 0b0100);
 
   TensorDim weight_dim(
-    1, 1, in_dim.width(), unit,
+    1, is_nchw ? 1 : unit, is_nchw ? in_dim.width() : 1,
+    is_nchw ? unit : in_dim.channel(),
     TensorDim::TensorType(context.getFormat(), context.getWeightDataType()),
-    0b0011);
+    is_nchw ? 0b0011 : 0b0101);
 
   weight_idx[FCParams::weight] = context.requestWeight(
     weight_dim, weight_initializer, weight_regularizer,
diff --git a/nntrainer/tensor/tensor.h b/nntrainer/tensor/tensor.h
index 23b5dcc..f8618ca 100644
--- a/nntrainer/tensor/tensor.h
+++ b/nntrainer/tensor/tensor.h
@@ -1145,8 +1145,7 @@ public:
    * @param f function to apply
    * @return int ML_ERROR_NONE if successful
    */
-  template <typename T = float>
-  int apply_i(std::function<T(T)> f) {
+  template <typename T = float> int apply_i(std::function<T(T)> f) {
     Tensor result = *this;
     apply<T>(f, result);
 
@@ -1158,8 +1157,7 @@ public:
    * @param[in] *function function pointer applied
    * @retval    Tensor
    */
-  template <typename T = float>
-  Tensor apply(std::function<T(T)> f) const {
+  template <typename T = float> Tensor apply(std::function<T(T)> f) const {
     Tensor result;
     return apply<T>(f, result);
   };
@@ -1215,8 +1213,8 @@ public:
 
     //     std::transform(data, data + size(), rdata, f);
     //   } else if (strides[3] == 1 && output.strides[3] == 1) {
-    //     /** @todo optimize this with combining these loops where stride is 1 */
-    //     for (unsigned int b = 0; b < batch(); ++b) {
+    //     /** @todo optimize this with combining these loops where stride is 1
+    //     */ for (unsigned int b = 0; b < batch(); ++b) {
     //       for (unsigned int c = 0; c < channel(); ++c) {
     //         for (unsigned int h = 0; h < height(); ++h) {
     //           float *out_data = output.getAddress<float>(b, c, h, 0);
@@ -1245,16 +1243,14 @@ public:
     //   // std::function<_FP16(_FP16)> f_16 =
     //   //   static_cast<std::function<_FP16(_FP16)>>(f);
 
-
-      
     //   if (contiguous && output.contiguous) {
     //     const _FP16 *data = (getData<_FP16>());
     //     _FP16 *rdata = (output.getData<_FP16>());
 
     //     std::transform(data, data + size(), rdata, f_16);
     //   } else if (strides[3] == 1 && output.strides[3] == 1) {
-    //     /** @todo optimize this with combining these loops where stride is 1 */
-    //     for (unsigned int b = 0; b < batch(); ++b) {
+    //     /** @todo optimize this with combining these loops where stride is 1
+    //     */ for (unsigned int b = 0; b < batch(); ++b) {
     //       for (unsigned int c = 0; c < channel(); ++c) {
     //         for (unsigned int h = 0; h < height(); ++h) {
     //           _FP16 *out_data = output.getAddress<_FP16>(b, c, h, 0);
@@ -1268,7 +1264,8 @@ public:
     //       for (unsigned int c = 0; c < channel(); ++c) {
     //         for (unsigned int h = 0; h < height(); ++h) {
     //           for (unsigned int w = 0; w < width(); ++w) {
-    //             output.setValue(b, c, h, w, f_16(getValue<_FP16>(b, c, h, w)));
+    //             output.setValue(b, c, h, w, f_16(getValue<_FP16>(b, c, h,
+    //             w)));
     //           }
     //         }
     //       }
@@ -1323,8 +1320,8 @@ public:
 
   //       std::transform(data, data + size(), rdata, f);
   //     } else if (strides[3] == 1 && output.strides[3] == 1) {
-  //       /** @todo optimize this with combining these loops where stride is 1 */
-  //       for (unsigned int b = 0; b < batch(); ++b) {
+  //       /** @todo optimize this with combining these loops where stride is 1
+  //       */ for (unsigned int b = 0; b < batch(); ++b) {
   //         for (unsigned int c = 0; c < channel(); ++c) {
   //           for (unsigned int h = 0; h < height(); ++h) {
   //             _FP16 *out_data = (_FP16 *)output.getAddress(b, c, h, 0);
@@ -1348,7 +1345,7 @@ public:
   //   #else
   //     throw std::invalid_argument("Error: enable-fp16 is not enabled");
   //   #endif
-  
+
   //   return output;
   // };
 
@@ -1965,20 +1962,20 @@ private:
                          v_func,
                        Tensor &output) const;
 #ifdef ENABLE_FP16
-  void apply_broadcast_util(
-    Tensor const &m,
-    std::function<void(const BroadcastInfo &e, const _FP16 *, const _FP16 *,
-                       _FP16 *)>
-      v_func,
-    Tensor &output, const BroadcastInfo &e, int cur_axis = -1,
-    size_t offset = 0, size_t m_offset = 0) const;
-
   void
-  apply_broadcast(Tensor const &m,
-                  std::function<void(const BroadcastInfo &e, const _FP16 *,
-                                     const _FP16 *, _FP16 *)>
-                    v_func,
-                  Tensor &output) const;
+  apply_broadcast_util(Tensor const &m,
+                       std::function<void(const BroadcastInfo &e, const _FP16 *,
+                                          const _FP16 *, _FP16 *)>
+                         v_func,
+                       Tensor &output, const BroadcastInfo &e,
+                       int cur_axis = -1, size_t offset = 0,
+                       size_t m_offset = 0) const;
+
+  void apply_broadcast(Tensor const &m,
+                       std::function<void(const BroadcastInfo &e, const _FP16 *,
+                                          const _FP16 *, _FP16 *)>
+                         v_func,
+                       Tensor &output) const;
 #endif
   /**
    * @brief compute Loop info for broadcasting and vectorization
diff --git a/test/unittest/layers/layers_golden_tests.cpp b/test/unittest/layers/layers_golden_tests.cpp
index bab3f3c..bf8e665 100644
--- a/test/unittest/layers/layers_golden_tests.cpp
+++ b/test/unittest/layers/layers_golden_tests.cpp
@@ -55,8 +55,11 @@ createInitContext(Layer *layer, const std::string &input_shape_str,
   std::vector<shape_parser_> parsed;
   from_string(input_shape_str, parsed);
 
-  if (tensor_type[2] == "fp16") {
-    for (auto &par : parsed) {
+  for (auto &par : parsed) {
+    par.get().setFormat(
+      str_converter<enum_class_prop_tag,
+                    nntrainer::TensorFormatInfo>::from_string(tensor_type[0]));
+    if (tensor_type[2] == "fp16") {
       par.get().setDataType(ml::train::TensorDim::DataType::FP16);
     }
   }
@@ -65,10 +68,13 @@ createInitContext(Layer *layer, const std::string &input_shape_str,
                            "golden_test", "", 0.0, tensor_type);
   layer->finalize(context);
 
-  if (tensor_type[2] == "fp16") {
-    for (auto dim : context.getInputDimensions()) {
+  for (auto dim : context.getInputDimensions()) {
+    if (tensor_type[2] == "fp16") {
       dim.setDataType(ml::train::TensorDim::DataType::FP16);
     }
+    dim.setFormat(
+      str_converter<enum_class_prop_tag,
+                    nntrainer::TensorFormatInfo>::from_string(tensor_type[0]));
   }
 
   return context;
@@ -263,25 +269,24 @@ static void compareRunContext(RunLayerContext &rc, std::ifstream &file,
 
   constexpr bool skip_compare = true;
 
-  compare_tensors(
-    rc.getNumWeights(), [&rc](unsigned idx) { return rc.getWeight(idx); },
-    always_read, skip_compare, "initial_weights");
-  compare_tensors(
-    rc.getNumInputs(), [&rc](unsigned idx) { return rc.getInput(idx); },
-    always_read, !skip_compare, "inputs");
-  compare_tensors(
-    rc.getNumOutputs(), [&rc](unsigned idx) { return rc.getOutput(idx); },
-    always_read, !skip_compare, "outputs", match_percentage);
-  compare_tensors(
-    rc.getNumWeights(), [&rc](unsigned idx) { return rc.getWeightGrad(idx); },
-    only_read_trainable, skip_grad, "gradients");
-  compare_tensors(
-    rc.getNumWeights(), [&rc](unsigned idx) { return rc.getWeight(idx); },
-    always_read, !skip_compare, "weights");
-  compare_tensors(
-    rc.getNumInputs(),
-    [&rc](unsigned idx) { return rc.getOutgoingDerivative(idx); }, always_read,
-    skip_deriv, "derivatives", match_percentage);
+  compare_tensors(rc.getNumWeights(),
+                  [&rc](unsigned idx) { return rc.getWeight(idx); },
+                  always_read, skip_compare, "initial_weights");
+  compare_tensors(rc.getNumInputs(),
+                  [&rc](unsigned idx) { return rc.getInput(idx); }, always_read,
+                  !skip_compare, "inputs");
+  compare_tensors(rc.getNumOutputs(),
+                  [&rc](unsigned idx) { return rc.getOutput(idx); },
+                  always_read, !skip_compare, "outputs", match_percentage);
+  compare_tensors(rc.getNumWeights(),
+                  [&rc](unsigned idx) { return rc.getWeightGrad(idx); },
+                  only_read_trainable, skip_grad, "gradients");
+  compare_tensors(rc.getNumWeights(),
+                  [&rc](unsigned idx) { return rc.getWeight(idx); },
+                  always_read, !skip_compare, "weights");
+  compare_tensors(rc.getNumInputs(),
+                  [&rc](unsigned idx) { return rc.getOutgoingDerivative(idx); },
+                  always_read, skip_deriv, "derivatives", match_percentage);
 }
 
 LayerGoldenTest::~LayerGoldenTest() {}
diff --git a/test/unittest/layers/unittest_layers_fully_connected.cpp b/test/unittest/layers/unittest_layers_fully_connected.cpp
index d3db139..ae58509 100644
--- a/test/unittest/layers/unittest_layers_fully_connected.cpp
+++ b/test/unittest/layers/unittest_layers_fully_connected.cpp
@@ -65,10 +65,12 @@ auto fc_basic_plain_fp16fp16 = LayerGoldenTestParamType(
   nntrainer::createLayer<nntrainer::FullyConnectedLayer>, {"unit=5"},
   "3:1:1:10", "fc_plain_fp16fp16.nnlayergolden",
   LayerGoldenTestParamOptions::DEFAULT, "nchw", "fp16", "fp16");
+
 auto fc_basic_single_batch_fp16fp16 = LayerGoldenTestParamType(
   nntrainer::createLayer<nntrainer::FullyConnectedLayer>, {"unit=4"},
   "1:1:1:10", "fc_single_batch_fp16fp16.nnlayergolden",
   LayerGoldenTestParamOptions::DEFAULT, "nchw", "fp16", "fp16");
+
 auto fc_basic_no_decay_fp16fp16 = LayerGoldenTestParamType(
   nntrainer::createLayer<nntrainer::FullyConnectedLayer>,
   {"unit=5", "weight_decay=0.0", "bias_decay=0.0"}, "3:1:1:10",
-- 
2.7.4