[ Tensor ] Templatize apply member function

author jijoong.moon <jijoong.moon@samsung.com>

Fri, 28 Jul 2023 13:57:29 +0000 (22:57 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Mon, 21 Aug 2023 06:29:23 +0000 (15:29 +0900)
author jijoong.moon <jijoong.moon@samsung.com>
Fri, 28 Jul 2023 13:57:29 +0000 (22:57 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Mon, 21 Aug 2023 06:29:23 +0000 (15:29 +0900)
diff --git a/Applications/Custom/mae_loss.cpp b/Applications/Custom/mae_loss.cpp

index 40a39e6cd8c253f167121634a76d2d67541fdf13..092f762cf6012152fec8dcba4e64c26dfeaddee4 100644 (file)
--- a/Applications/Custom/mae_loss.cpp
+++ b/Applications/Custom/mae_loss.cpp
@@ -43,7 +43,7 @@ void MaeLossLayer::calcDerivative(nntrainer::RunLayerContext &context) {
    unsigned int size = predicted.size();
    float deriv_val = 1.0f / (float)size;
  
-  deriv.apply_i([deriv_val](float x) {
+  deriv.apply_i<float>([deriv_val](float x) {
      if (fabs(x) < EPSILON_) {
        return 0.0f;
      }
diff --git a/Applications/ProductRatings/jni/main.cpp b/Applications/ProductRatings/jni/main.cpp

index 7a5d888d448a9d8a226a4da7c9a7bfa1895f1f62..029608889ad69f748e32dea1de9a51a35e227cd1 100644 (file)
--- a/Applications/ProductRatings/jni/main.cpp
+++ b/Applications/ProductRatings/jni/main.cpp
@@ -248,7 +248,7 @@ int main(int argc, char *argv[]) {
        try {
          float answer =
            NN.inference({MAKE_SHARED_TENSOR(nntrainer::Tensor({o}, nntrainer::TensorDim::TensorType()))})[0]
-            ->apply(stepFunction)
+         ->apply<float>(stepFunction)
              .getValue(0, 0, 0, 0);
  
          std::cout << answer << " : " << l[0] << std::endl;
diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp

index 717e382d77ba424d322483abd6dc16cff83f15d5..450728f698c5b23497e95de503022b021165dedd 100644 (file)
--- a/nntrainer/graph/network_graph.cpp
+++ b/nntrainer/graph/network_graph.cpp
@@ -661,6 +661,16 @@ NetworkGraph::canExecuteInPlace(const std::shared_ptr<LayerNode> &lnode) {
      return InPlace::RESTRICTING;
    }
  
+  /**
+   * if the layer's input and output type is not FP32, then it cannot be
+   * inplace. We assume that the input is always FP32.
+   */
+  if (lnode->getInputConnections().empty()) {
+    if (!istrequal(getTensorType()[3], "FP32"))
+      return InPlace::NONE;
+
+  }
+
    return InPlace::NONE;
  }
  
diff --git a/nntrainer/layers/acti_func.cpp b/nntrainer/layers/acti_func.cpp

index d8fef837786a1944e6a30467bcdb918fdeba4822..c33ff70cdd9c71f13a61deeb0e067440232a0203 100644 (file)
--- a/nntrainer/layers/acti_func.cpp
+++ b/nntrainer/layers/acti_func.cpp
@@ -217,7 +217,7 @@ Tensor &ActiFunc::softmax(Tensor const &input, Tensor &output) {
    }
  
    // take exp
-  output.apply(exp_util, output);
+  output.apply<float>(exp_util, output);
  
    // take sum over the last dimension
    Tensor sum = output.sum(3);
@@ -337,7 +337,7 @@ float ActiFunc::leakyReluPrime(float x) {
  }
  
  Tensor &ActiFunc::swish(Tensor const &t_in, Tensor &t_out) {
-  t_in.apply([&](float x) { return sigmoid(x); }, t_out);
+  t_in.apply<float>([&](float x) { return sigmoid(x); }, t_out);
    t_out.multiply_i(t_in);
  
    return t_out;
@@ -350,8 +350,8 @@ Tensor &ActiFunc::swishPrime(Tensor const &t_in, Tensor const &t_out,
      outgoing_derivative = Tensor(t_out.getDim());
  
    Tensor tmp = Tensor(t_out.getDim());
-  t_in.apply([&](float x) { return sigmoid(x); }, outgoing_derivative);
-  t_out.apply([&](float x) { return 1 - x; }, tmp);
+  t_in.apply<float>([&](float x) { return sigmoid(x); }, outgoing_derivative);
+  t_out.apply<float>([&](float x) { return 1 - x; }, tmp);
    outgoing_derivative.multiply_i(tmp);
    outgoing_derivative.add_i(t_out);
  
@@ -362,7 +362,7 @@ Tensor &ActiFunc::swishPrime(Tensor const &t_in, Tensor const &t_out,
  
  Tensor &ActiFunc::gelu(Tensor const &t_in, Tensor &t_out) {
    float tmp = 1 / sqrt(2);
-  t_in.apply([&](float x) { return 0.5 * x * (1 + erf(x * tmp)); }, t_out);
+  t_in.apply<float>([&](float x) { return 0.5 * x * (1 + erf(x * tmp)); }, t_out);
    return t_out;
  }
  
@@ -374,7 +374,7 @@ Tensor &ActiFunc::geluPrime(Tensor const &t_in, Tensor const &t_out,
      outgoing_derivative = Tensor(t_out.getDim());
  
    float tmp = 1 / sqrt(2);
-  t_in.apply(
+  t_in.apply<float>(
      [&](float x) {
        return 0.5 * (1 + erf(x * tmp) +
                      x * ((2 / sqrt(M_PI)) * exp(-pow(x * tmp, 2))) * tmp);
diff --git a/nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp b/nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp

index 63abf7dc5444484e617b424f608c25425b56b527..8308003782d058f34e96fc6794be379d9285ce12 100644 (file)
--- a/nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp
+++ b/nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp
@@ -30,19 +30,19 @@ void CrossEntropySigmoidLossLayer::forwarding(RunLayerContext &context,
    Tensor &y = context.getInput(SINGLE_INOUT_IDX);
  
    // fill the output
-  hidden_ = y.apply(ActiFunc::sigmoid, hidden_);
+  hidden_ = y.apply<float>(ActiFunc::sigmoid, hidden_);
  
    if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
      Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX);
      // @todo: change this to apply_i
      // @note: the output should be logit before applying sigmoid
      // log(1 + exp(-abs(y))) + max(y, 0)
-    Tensor mid_term = y.apply(static_cast<float (*)(float)>(&std::fabs))
+    Tensor mid_term = y.apply<float>(static_cast<float (*)(float)>(&std::fabs))
                          .multiply(-1.0)
-                        .apply(static_cast<float (*)(float)>(&std::exp))
+                        .apply<float>(static_cast<float (*)(float)>(&std::exp))
                          .add(1.0)
-                        .apply(logFloat);
-    mid_term = mid_term.add(y.apply(ActiFunc::relu));
+                        .apply<float>(logFloat);
+    mid_term = mid_term.add(y.apply<float>(ActiFunc::relu));
  
      // y * y2
      Tensor end_term = y2.chain().multiply_i(y).run();
@@ -60,7 +60,7 @@ void CrossEntropySigmoidLossLayer::calcDerivative(RunLayerContext &context) {
    const Tensor &y2 = context.getIncomingDerivative(SINGLE_INOUT_IDX);
    Tensor &y = context.getInput(SINGLE_INOUT_IDX);
  
-  y.apply(ActiFunc::sigmoid, ret_derivative);
+  y.apply<float>(ActiFunc::sigmoid, ret_derivative);
    ret_derivative.subtract_i(y2);
    if (ret_derivative.divide_i(ret_derivative.size()) != ML_ERROR_NONE) {
      throw std::runtime_error("[CrossEntropySigmoidLossLayer::calcDerivative] "
diff --git a/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp b/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp

index 08b74f8356609c2aee095adf9f038c35c241a127..92ce9ec6f6c0c55c63992bac3dea667783ce9dcc 100644 (file)
--- a/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp
+++ b/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp
@@ -34,7 +34,7 @@ void CrossEntropySoftmaxLossLayer::forwarding(RunLayerContext &context,
  
    if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
      Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX);
-    l = y2.multiply(hidden_.apply(logFloat)).sum_by_batch().multiply(-1);
+    l = y2.multiply(hidden_.apply<float>(logFloat)).sum_by_batch().multiply(-1);
  
      // update the loss value
      LossLayer::updateLoss(context, l);
diff --git a/nntrainer/layers/mol_attention_layer.cpp b/nntrainer/layers/mol_attention_layer.cpp

index 40c8f319b5089da4265e355efad45aa45fd3bfd2..efacd24849c9d7cc263948e3b60d3c70b778d3cc 100644 (file)
--- a/nntrainer/layers/mol_attention_layer.cpp
+++ b/nntrainer/layers/mol_attention_layer.cpp
@@ -200,8 +200,8 @@ void MoLAttentionLayer::forwarding(RunLayerContext &context, bool training) {
    alpha_src.copy_with_stride(
      fc_proj_out.getSharedDataTensor({batch, 1, 1, mol_k}, mol_k * 2, false));
  
-  kappa_src.apply_i(&expf);
-  beta_src.apply_i(&expf);
+  kappa_src.apply_i<float>(&expf);
+  beta_src.apply_i<float>(&expf);
    Tensor kappa = kappa_src;
    Tensor beta = beta_src;
  
diff --git a/nntrainer/optimizers/adam.cpp b/nntrainer/optimizers/adam.cpp

index e31b19eeec4ebc3530e5ff556d86bbb290c4733d..b0dbd38c753edc04611bbcee759097264d867b23 100644 (file)
--- a/nntrainer/optimizers/adam.cpp
+++ b/nntrainer/optimizers/adam.cpp
@@ -86,7 +86,7 @@ void Adam::applyGradient(RunOptimizerContext &context) {
    wv.add_i(x_grad.multiply(x_grad), 1.0f - beta2);
  
    if (torch_ref) {
-    Tensor denom = wv.apply(sqrtFloat);
+    Tensor denom = wv.apply<float>(sqrtFloat);
      denom.divide_i(sqrtFloat(biasCorrection2));
      denom.add_i(epsilon);
      wm.divide(denom, x_grad);
@@ -98,7 +98,7 @@ void Adam::applyGradient(RunOptimizerContext &context) {
        return 1 / (sqrtDouble(f) + epsilon);
      };
  
-    x_grad = wv.apply(sqrtEps, x_grad);
+    x_grad = wv.apply<float>(sqrtEps, x_grad);
      x_grad.multiply_i(wm);
      context.applyGradient(getUpdatedLearningRate(context.getIteration(),
                                                   context.getLearningRate()));
diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp

index 221b21ac9fea01f875bcb9817869506520aa8348..8cf7e8c332970e04e1aaace4b203f5d617b27105 100644 (file)
--- a/nntrainer/tensor/tensor.cpp
+++ b/nntrainer/tensor/tensor.cpp
@@ -258,12 +258,8 @@ void Tensor::setRandBernoulli(float probability) {
      setDist<float, std::bernoulli_distribution>(
        std::bernoulli_distribution(probability));
    } else if (this->getDataType() == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
      setDist<_FP16, std::bernoulli_distribution>(
-      std::bernoulli_distribution((_FP16)probability));
-#else
-    throw std::invalid_argument("Error: enable-fp16 is not enabled");
-#endif
+      std::bernoulli_distribution(probability));
    }
  }
  
@@ -711,18 +707,18 @@ Tensor Tensor::multiply(float const &value) const {
  
  Tensor &Tensor::multiply(float const &value, Tensor &out) const {
    /// @todo add unittest
-  // if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) {
+  if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) {
      auto f = std::bind(std::multiplies<float>(), std::placeholders::_1, value);
-    return apply(f, out);
-//   } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
-// #ifdef ENABLE_FP16
-//     auto f = std::bind(std::multiplies<_FP16>(), std::placeholders::_1,
-//                        static_cast<_FP16>(value));
-//     return apply(f, out);
-// #else
-//     throw std::invalid_argument("Error: enable-fp16 is not enabled");
-// #endif
-  // }
+    return apply<float>(f, out);
+  } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+    auto f = std::bind(std::multiplies<_FP16>(), std::placeholders::_1,
+                       static_cast<_FP16>(value));
+    return apply<_FP16>(f, out);
+#else
+    throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
+  }
    return out;
  }
  
@@ -838,17 +834,17 @@ Tensor &Tensor::divide(float const &value, Tensor &out) const {
      throw std::invalid_argument(ss.str().c_str());
    }
  
-  // if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) {
+  if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) {
      auto f = std::bind(std::divides<float>(), std::placeholders::_1, value);
-    return apply(f, out);
-//   } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
-// #ifdef ENABLE_FP16
-//     auto f = std::bind(std::divides<_FP16>(), std::placeholders::_1, static_cast<_FP16>(value));
-//     return apply(f, out);
-// #else
-//     throw std::invalid_argument("Error: enable-fp16 is not enabled");
-// #endif
-//   }
+    return apply<float>(f, out);
+  } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+    auto f = std::bind(std::divides<_FP16>(), std::placeholders::_1, static_cast<_FP16>(value));
+    return apply<_FP16>(f, out);
+#else
+    throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
+  }
    return out;
  }
  
@@ -931,18 +927,18 @@ Tensor Tensor::add(float const &value) const {
  
  Tensor &Tensor::add(float const &value, Tensor &out) const {
    /// @todo add unittest
-  // if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) {
+  if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) {
      auto f = std::bind(std::plus<float>(), std::placeholders::_1, value);
-    return apply(f, out);
-//   } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
-// #ifdef ENABLE_FP16
-//     auto f = std::bind(std::plus<_FP16>(), std::placeholders::_1,
-//                        static_cast<_FP16>(value));
-//     return apply(f, out);
-// #else
-//     throw std::invalid_argument("Error: enable-fp16 is not enabled");
-// #endif
-//   }
+    return apply<float>(f, out);
+  } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+    auto f = std::bind(std::plus<_FP16>(), std::placeholders::_1,
+                       static_cast<_FP16>(value));
+    return apply<_FP16>(f, out);
+#else
+    throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
+  }
    return out;
  }
  
@@ -1057,18 +1053,18 @@ Tensor Tensor::subtract(float const &value) const {
  
  Tensor &Tensor::subtract(float const &value, Tensor &out) const {
    /// @todo add unittest
-  // if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) {
+  if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) {
      auto f = std::bind(std::minus<float>(), std::placeholders::_1, value);
-    return apply(f, out);
-//   } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
-// #ifdef ENABLE_FP16
-//     auto f = std::bind(std::minus<_FP16>(), std::placeholders::_1,
-//                        static_cast<_FP16>(value));
-//     return apply(f, out);
-// #else
-//     ml_loge("%s", "Error: enable-fp16 is not enabled");
-// #endif
-//   }
+    return apply<float>(f, out);
+  } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+    auto f = std::bind(std::minus<_FP16>(), std::placeholders::_1,
+                       static_cast<_FP16>(value));
+    return apply<_FP16>(f, out);
+#else
+    ml_loge("%s", "Error: enable-fp16 is not enabled");
+#endif
+  }
    return out; // shouldn't reach
  }
  
@@ -1091,21 +1087,21 @@ Tensor Tensor::pow(float exponent) const {
  }
  
  Tensor &Tensor::pow(float exponent, Tensor &out) const {
-  // if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) {
+  if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) {
      auto f = [exponent](float in) { return powf(in, exponent); };
-    return apply(f, out);
-  // }
-//   if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
-// #ifdef ENABLE_FP16
-//     auto f = [exponent](_FP16 in) {
-//       return static_cast<_FP16>(powf(in, exponent));
-//     };
-//     return apply(f, out);
-// #else
-//     ml_loge("%s", "Error: enable-fp16 is not enabled");
-// #endif
-//   }
-  // return out;
+    return apply<float>(f, out);
+  }
+  if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+    auto f = [exponent](_FP16 in) {
+      return static_cast<_FP16>(powf(in, exponent));
+    };
+    return apply<_FP16>(f, out);
+#else
+    ml_loge("%s", "Error: enable-fp16 is not enabled");
+#endif
+  }
+  return out;
  }
  
  Tensor Tensor::getBatchSlice(size_t offset, unsigned int size) const {
@@ -1301,7 +1297,7 @@ std::vector<Tensor> Tensor::split(std::vector<size_t> sizes, int axis) {
                           ret_dims[i].width(), ret_dims[i].channel()};
        }
  
-      ret_t.apply_i([&iter_value, &loc, &end_loc, &reset_dim_arr](float _) {
+      ret_t.apply_i<float>([&iter_value, &loc, &end_loc, &reset_dim_arr](float _) {
          return iter_value(loc, end_loc, reset_dim_arr);
        });
      }
@@ -1380,7 +1376,7 @@ std::vector<Tensor> Tensor::split(std::vector<size_t> sizes, int axis) {
                           ret_dims[i].width(), ret_dims[i].channel()};
        }
  
-      ret_t.apply_i([&iter_value, &loc, &end_loc, &reset_dim_arr](float _) {
+      ret_t.apply_i<_FP16>([&iter_value, &loc, &end_loc, &reset_dim_arr](_FP16 _) {
          return iter_value(loc, end_loc, reset_dim_arr);
        });
      }
@@ -3069,13 +3065,13 @@ void Tensor::setZero() {
      if (contiguous)
        sscal(size(), 0, getData<float>(), 1);
      else
-      apply_i([](float val) -> float { return 0; });
+      apply_i<float>([](float val) -> float { return 0; });
    } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
  #ifdef ENABLE_FP16
      if (contiguous)
        sscal(size(), 0, getData<_FP16>(), 1);
      else
-      apply_i([](float val) -> float { return 0; });
+      apply_i<_FP16>([](_FP16 val) -> _FP16 { return 0; });
  #else
      throw std::invalid_argument("Error: enable-fp16 is not enabled");
  #endif
diff --git a/nntrainer/tensor/tensor.h b/nntrainer/tensor/tensor.h

index bef60bfedd0583bb588dc1cf100c65185b618b4e..72cb53b20cbbb1229bcb9f0c93e70fdbe9328bf7 100644 (file)
--- a/nntrainer/tensor/tensor.h
+++ b/nntrainer/tensor/tensor.h
@@ -1145,9 +1145,10 @@ public:
     * @param f function to apply
     * @return int ML_ERROR_NONE if successful
     */
-  int apply_i(std::function<float(float)> f) {
+  template <typename T = float>
+  int apply_i(std::function<T(T)> f) {
      Tensor result = *this;
-    apply(f, result);
+    apply<T>(f, result);
  
      return ML_ERROR_NONE;
    };
@@ -1157,9 +1158,10 @@ public:
     * @param[in] *function function pointer applied
     * @retval    Tensor
     */
-  Tensor apply(std::function<float(float)> f) const {
+  template <typename T = float>
+  Tensor apply(std::function<T(T)> f) const {
      Tensor result;
-    return apply(f, result);
+    return apply<T>(f, result);
    };
  
    /**
@@ -1168,8 +1170,8 @@ public:
     * @param[out] output output tensor
     * @retval    Tensor
     */
-  
-  Tensor &apply(std::function<float(float)> f, Tensor &output) const {
+  template <typename T = float>
+  Tensor &apply(std::function<T(T)> f, Tensor &output) const {
      CREATE_IF_EMPTY_DIMS(output, dim, nullptr);
  
      if (dim != output.dim) {
@@ -1178,73 +1180,101 @@ public:
          "[Tensor::apply] output dimension does not match");
      }
  
-    if (dim.getDataType() == Tdatatype::FP32) {
-      if (contiguous && output.contiguous) {
-        const float *data = (getData<float>());
-        float *rdata = (output.getData<float>());
-
-        std::transform(data, data + size(), rdata, f);
-      } else if (strides[3] == 1 && output.strides[3] == 1) {
-        /** @todo optimize this with combining these loops where stride is 1 */
-        for (unsigned int b = 0; b < batch(); ++b) {
-          for (unsigned int c = 0; c < channel(); ++c) {
-            for (unsigned int h = 0; h < height(); ++h) {
-              float *out_data = output.getAddress<float>(b, c, h, 0);
-              const float *in_data = getAddress<float>(b, c, h, 0);
-              std::transform(in_data, in_data + width(), out_data, f);
-            }
+    if (contiguous && output.contiguous) {
+      const T *data = (getData<T>());
+      T *rdata = (output.getData<T>());
+
+      std::transform(data, data + size(), rdata, f);
+    } else if (strides[3] == 1 && output.strides[3] == 1) {
+      /** @todo optimize this with combining these loops where stride is 1 */
+      for (unsigned int b = 0; b < batch(); ++b) {
+        for (unsigned int c = 0; c < channel(); ++c) {
+          for (unsigned int h = 0; h < height(); ++h) {
+            T *out_data = output.getAddress<T>(b, c, h, 0);
+            const T *in_data = getAddress<T>(b, c, h, 0);
+            std::transform(in_data, in_data + width(), out_data, f);
            }
          }
-      } else {
-        for (unsigned int b = 0; b < batch(); ++b) {
-          for (unsigned int c = 0; c < channel(); ++c) {
-            for (unsigned int h = 0; h < height(); ++h) {
-              for (unsigned int w = 0; w < width(); ++w) {
-                output.setValue(b, c, h, w, f(getValue<float>(b, c, h, w)));
-              }
+      }
+    } else {
+      for (unsigned int b = 0; b < batch(); ++b) {
+        for (unsigned int c = 0; c < channel(); ++c) {
+          for (unsigned int h = 0; h < height(); ++h) {
+            for (unsigned int w = 0; w < width(); ++w) {
+              output.setValue(b, c, h, w, f(getValue<T>(b, c, h, w)));
              }
            }
          }
        }
-    } else if (dim.getDataType() == Tdatatype::FP16) {
-
-      auto f_16 = [f](_FP16 x) -> _FP16 {
-        return static_cast<_FP16>(f(static_cast<float>(x)));
-      };
+    }
  
-      // std::function<_FP16(_FP16)> f_16 =
-      //   static_cast<std::function<_FP16(_FP16)>>(f);
+    // if (dim.getDataType() == Tdatatype::FP32) {
+    //   if (contiguous && output.contiguous) {
+    //     const float *data = (getData<float>());
+    //     float *rdata = (output.getData<float>());
+
+    //     std::transform(data, data + size(), rdata, f);
+    //   } else if (strides[3] == 1 && output.strides[3] == 1) {
+    //     /** @todo optimize this with combining these loops where stride is 1 */
+    //     for (unsigned int b = 0; b < batch(); ++b) {
+    //       for (unsigned int c = 0; c < channel(); ++c) {
+    //         for (unsigned int h = 0; h < height(); ++h) {
+    //           float *out_data = output.getAddress<float>(b, c, h, 0);
+    //           const float *in_data = getAddress<float>(b, c, h, 0);
+    //           std::transform(in_data, in_data + width(), out_data, f);
+    //         }
+    //       }
+    //     }
+    //   } else {
+    //     for (unsigned int b = 0; b < batch(); ++b) {
+    //       for (unsigned int c = 0; c < channel(); ++c) {
+    //         for (unsigned int h = 0; h < height(); ++h) {
+    //           for (unsigned int w = 0; w < width(); ++w) {
+    //             output.setValue(b, c, h, w, f(getValue<float>(b, c, h, w)));
+    //           }
+    //         }
+    //       }
+    //     }
+    //   }
+    // } else if (dim.getDataType() == Tdatatype::FP16) {
+
+    //   auto f_16 = [f](_FP16 x) -> _FP16 {
+    //     return static_cast<_FP16>(f(static_cast<float>(x)));
+    //   };
+
+    //   // std::function<_FP16(_FP16)> f_16 =
+    //   //   static_cast<std::function<_FP16(_FP16)>>(f);
  
  
        
-      if (contiguous && output.contiguous) {
-        const _FP16 *data = (getData<_FP16>());
-        _FP16 *rdata = (output.getData<_FP16>());
-
-        std::transform(data, data + size(), rdata, f_16);
-      } else if (strides[3] == 1 && output.strides[3] == 1) {
-        /** @todo optimize this with combining these loops where stride is 1 */
-        for (unsigned int b = 0; b < batch(); ++b) {
-          for (unsigned int c = 0; c < channel(); ++c) {
-            for (unsigned int h = 0; h < height(); ++h) {
-              _FP16 *out_data = output.getAddress<_FP16>(b, c, h, 0);
-              const _FP16 *in_data = getAddress<_FP16>(b, c, h, 0);
-              std::transform(in_data, in_data + width(), out_data, f_16);
-            }
-          }
-        }
-      } else {
-        for (unsigned int b = 0; b < batch(); ++b) {
-          for (unsigned int c = 0; c < channel(); ++c) {
-            for (unsigned int h = 0; h < height(); ++h) {
-              for (unsigned int w = 0; w < width(); ++w) {
-                output.setValue(b, c, h, w, f_16(getValue<_FP16>(b, c, h, w)));
-              }
-            }
-          }
-        }
-      }
-    }
+    //   if (contiguous && output.contiguous) {
+    //     const _FP16 *data = (getData<_FP16>());
+    //     _FP16 *rdata = (output.getData<_FP16>());
+
+    //     std::transform(data, data + size(), rdata, f_16);
+    //   } else if (strides[3] == 1 && output.strides[3] == 1) {
+    //     /** @todo optimize this with combining these loops where stride is 1 */
+    //     for (unsigned int b = 0; b < batch(); ++b) {
+    //       for (unsigned int c = 0; c < channel(); ++c) {
+    //         for (unsigned int h = 0; h < height(); ++h) {
+    //           _FP16 *out_data = output.getAddress<_FP16>(b, c, h, 0);
+    //           const _FP16 *in_data = getAddress<_FP16>(b, c, h, 0);
+    //           std::transform(in_data, in_data + width(), out_data, f_16);
+    //         }
+    //       }
+    //     }
+    //   } else {
+    //     for (unsigned int b = 0; b < batch(); ++b) {
+    //       for (unsigned int c = 0; c < channel(); ++c) {
+    //         for (unsigned int h = 0; h < height(); ++h) {
+    //           for (unsigned int w = 0; w < width(); ++w) {
+    //             output.setValue(b, c, h, w, f_16(getValue<_FP16>(b, c, h, w)));
+    //           }
+    //         }
+    //       }
+    //     }
+    //   }
+    // }
      return output;
    };
  
diff --git a/nntrainer/tensor/weight.h b/nntrainer/tensor/weight.h

index 988f7de35f9397a0baa75dd70e23063d34f49f66..41fd7eaa7729bf227214a6c2ea77d0eaf21d2a9f 100644 (file)
--- a/nntrainer/tensor/weight.h
+++ b/nntrainer/tensor/weight.h
@@ -252,7 +252,9 @@ public:
    /**
     * @brief     Apply the gradient to the weight
     */
-  void applyGradient(double lr) { var->add_i(*grad.get(), -lr); }
+  void applyGradient(double lr) { 
+    var->add_i(*grad.get(), -lr); 
+    }
  
    /**
     * @brief Check if the gradient is supposed to be clipped by global norm with
diff --git a/nntrainer/utils/util_func.cpp b/nntrainer/utils/util_func.cpp

index da9f6c86582520e1b0ce41230f6f0944cb186e24..d42764c2bff1391040a8cb3b012eaec240243553 100644 (file)
--- a/nntrainer/utils/util_func.cpp
+++ b/nntrainer/utils/util_func.cpp
@@ -40,7 +40,6 @@ float sqrtFloat(float x) { return sqrt(x); };
  double sqrtDouble(double x) { return sqrt(x); };
  
  float logFloat(float x) { return log(x + 1.0e-20); }
-1103
  
  float exp_util(float x) { return exp(x); }
  
diff --git a/test/unittest/unittest_nntrainer_activations.cpp b/test/unittest/unittest_nntrainer_activations.cpp

index fcbdc088a86b21a61d0a9a6013dce1d9e0fc4937..8e1b13f7157e62f961a2c3bfaa1aea6831c57eeb 100644 (file)
--- a/test/unittest/unittest_nntrainer_activations.cpp
+++ b/test/unittest/unittest_nntrainer_activations.cpp
@@ -136,7 +136,7 @@ TEST(nntrainer_activation, sigmoid_01_p) {
    nntrainer::Tensor input(batch, channel, height, width);
    GEN_TEST_INPUT(input, (l - 4) * 0.1 * (i + 1));
  
-  nntrainer::Tensor Results = input.apply(nntrainer::ActiFunc::sigmoid);
+  nntrainer::Tensor Results = input.apply<float>(nntrainer::ActiFunc::sigmoid);
  
    float *data = Results.getData();
    ASSERT_NE(nullptr, data);
@@ -167,12 +167,12 @@ TEST(nntrainer_activation, DISABLED_sigmoidPrime_01_p) {
    GEN_TEST_INPUT(input, (l - 4) * 0.1 * (i + 1));
  
    nntrainer::Tensor sigmoid_result =
-    input.apply(nntrainer::ActiFunc::sigmoid);
+    input.apply<float>(nntrainer::ActiFunc::sigmoid);
    float *data = sigmoid_result.getData();
    ASSERT_NE(nullptr, data);
  
    nntrainer::Tensor prime_result =
-    sigmoid_result.apply(nntrainer::ActiFunc::sigmoidPrime);
+    sigmoid_result.apply<float>(nntrainer::ActiFunc::sigmoidPrime);
    data = prime_result.getData();
    ASSERT_NE(nullptr, data);
  
@@ -199,7 +199,7 @@ TEST(nntrainer_activation, tanhFloat_01_p) {
    nntrainer::Tensor input(batch, channel, height, width);
    GEN_TEST_INPUT(input, (l - 4) * 0.1 * (i + 1));
  
-  nntrainer::Tensor Results = input.apply(nntrainer::ActiFunc::tanhFloat);
+  nntrainer::Tensor Results = input.apply<float>(nntrainer::ActiFunc::tanhFloat);
  
    float *data = Results.getData();
    ASSERT_NE(nullptr, data);
@@ -226,12 +226,12 @@ TEST(nntrainer_activation, DISABLED_tanhFloatPrime_01_p) {
    nntrainer::Tensor input(batch, channel, height, width);
    GEN_TEST_INPUT(input, (l - 4) * 0.1 * (i + 1));
  
-  nntrainer::Tensor tanh_result = input.apply(nntrainer::ActiFunc::tanhFloat);
+  nntrainer::Tensor tanh_result = input.apply<float>(nntrainer::ActiFunc::tanhFloat);
    float *data = tanh_result.getData();
    ASSERT_NE(nullptr, data);
  
    nntrainer::Tensor prime_result =
-    tanh_result.apply(nntrainer::ActiFunc::tanhPrime);
+    tanh_result.apply<float>(nntrainer::ActiFunc::tanhPrime);
    data = prime_result.getData();
    ASSERT_NE(nullptr, data);
  
@@ -252,7 +252,7 @@ TEST(nntrainer_activation, relu_01_p) {
    nntrainer::Tensor input(batch, channel, height, width);
    GEN_TEST_INPUT(input, (l - 4) * 0.1 * (i + 1));
  
-  nntrainer::Tensor Results = input.apply(nntrainer::ActiFunc::relu);
+  nntrainer::Tensor Results = input.apply<float>(nntrainer::ActiFunc::relu);
  
    float *data = Results.getData();
    ASSERT_NE(nullptr, data);
@@ -275,12 +275,12 @@ TEST(nntrainer_activation, reluPrime_01_p) {
    nntrainer::Tensor input(batch, channel, height, width);
    GEN_TEST_INPUT(input, (l - 4) * 0.1 * (i + 1));
  
-  nntrainer::Tensor relu_result = input.apply(nntrainer::ActiFunc::relu);
+  nntrainer::Tensor relu_result = input.apply<float>(nntrainer::ActiFunc::relu);
    float *data = relu_result.getData();
    ASSERT_NE(nullptr, data);
  
    nntrainer::Tensor prime_result =
-    relu_result.apply(nntrainer::ActiFunc::reluPrime);
+    relu_result.apply<float>(nntrainer::ActiFunc::reluPrime);
    data = prime_result.getData();
    ASSERT_NE(nullptr, data);
  
diff --git a/test/unittest/unittest_nntrainer_lazy_tensor.cpp b/test/unittest/unittest_nntrainer_lazy_tensor.cpp

index b6db8fb998b907f831574240cba29e8983352706..81ff486c86a2d6a5b8579dc3f3905096d7dda8ac 100644 (file)
--- a/test/unittest/unittest_nntrainer_lazy_tensor.cpp
+++ b/test/unittest/unittest_nntrainer_lazy_tensor.cpp
@@ -36,7 +36,7 @@ protected:
     */
    nntrainer::Tensor constant_(float value) {
      nntrainer::Tensor t(batch, channel, height, width);
-    return t.apply([value](float) { return value; });
+    return t.apply<float>([value](float) { return value; });
    }
  
    nntrainer::Tensor target;
diff --git a/test/unittest/unittest_util_func.cpp b/test/unittest/unittest_util_func.cpp

index 9291fcac9f1a2ac30801f23938d5421454287b4b..29c43fea04de955c1a53d0aadcfe4a2b6ac7cb00 100644 (file)
--- a/test/unittest/unittest_util_func.cpp
+++ b/test/unittest/unittest_util_func.cpp
@@ -44,7 +44,7 @@ TEST(nntrainer_util_func, logFloat_01_p) {
    nntrainer::Tensor input(batch, channel, height, width);
    GEN_TEST_INPUT(input, i * (width) + k + 1);
  
-  nntrainer::Tensor Results = input.apply(nntrainer::logFloat);
+  nntrainer::Tensor Results = input.apply<float>(nntrainer::logFloat);
  
    float *data = Results.getData();
    ASSERT_NE(nullptr, data);
author	jijoong.moon <jijoong.moon@samsung.com>
	Fri, 28 Jul 2023 13:57:29 +0000 (22:57 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Mon, 21 Aug 2023 06:29:23 +0000 (15:29 +0900)
Applications/Custom/mae_loss.cpp		patch \| blob \| history
Applications/ProductRatings/jni/main.cpp		patch \| blob \| history
nntrainer/graph/network_graph.cpp		patch \| blob \| history
nntrainer/layers/acti_func.cpp		patch \| blob \| history
nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp		patch \| blob \| history
nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp		patch \| blob \| history
nntrainer/layers/mol_attention_layer.cpp		patch \| blob \| history
nntrainer/optimizers/adam.cpp		patch \| blob \| history
nntrainer/tensor/tensor.cpp		patch \| blob \| history
nntrainer/tensor/tensor.h		patch \| blob \| history
nntrainer/tensor/weight.h		patch \| blob \| history
nntrainer/utils/util_func.cpp		patch \| blob \| history
test/unittest/unittest_nntrainer_activations.cpp		patch \| blob \| history
test/unittest/unittest_nntrainer_lazy_tensor.cpp		patch \| blob \| history
test/unittest/unittest_util_func.cpp		patch \| blob \| history