[Layer] Remove Tensor setDataType() usuage
authorDonghyeon Jeong <dhyeon.jeong@samsung.com>
Thu, 7 Mar 2024 05:43:48 +0000 (14:43 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Fri, 15 Mar 2024 01:22:32 +0000 (10:22 +0900)
In several layers, there are attempts to change the data type of a Tensor object after initializing it.
This is currently possible but can cause issues down the line (e.g., treat FloatTensor object as HalfTensor).
As such, the setDataType() method will be removed and considered not to be used in future updates.
Instead, users will need to provide the desired data type when creating a new tensor.

**Self-evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test:   [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Donghyeon Jeong <dhyeon.jeong@samsung.com>
nntrainer/layers/layer_normalization_layer.cpp
nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp
nntrainer/layers/lstm.cpp
nntrainer/layers/lstmcell.cpp
nntrainer/layers/lstmcell_core.cpp
nntrainer/layers/multi_head_attention_layer.cpp

index 2582408800de5f9c05693ab9d536a6a698b8c3c0..466ca93bb72f3a787ef7a65e7e8fb2cfedfdb593 100644 (file)
@@ -227,7 +227,6 @@ void LayerNormalizationLayer::incremental_forwarding(RunLayerContext &context,
   deviation.multiply(inv_std_dev, output);
   output.multiply_i(gamma);
   output.add_i(beta);
-
 }
 
 void LayerNormalizationLayer::calcDerivative(RunLayerContext &context) {
@@ -236,8 +235,8 @@ void LayerNormalizationLayer::calcDerivative(RunLayerContext &context) {
   TensorDim::TensorType weight_tensor_type =
     context.getWeight(wt_idx[LNParams::gamma]).getTensorType();
 
-  Tensor empty;
-  empty.setTensorType(weight_tensor_type);
+  Tensor empty =
+    Tensor("empty", weight_tensor_type.format, weight_tensor_type.data_type);
 
   Tensor &outgoing_derivative = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
   const Tensor &incoming_derivative =
index 1990d160a60b628ac9b79499fabb12c2ee5c40dd..53854662ae7981d5f004f595c8def17f3fbdd264 100644 (file)
@@ -68,9 +68,7 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) {
   Tensor &y = context.getInput(SINGLE_INOUT_IDX);
 
   auto dataType = y.getDataType();
-
-  Tensor ret;
-  ret.setDataType(dataType);
+  Tensor ret = Tensor("ret", y.getFormat(), y.getDataType());
   if (dataType == ml::train::TensorDim::DataType::FP32) {
     y.apply(ActiFunc::softmax<float>, ret);
   } else if (dataType == ml::train::TensorDim::DataType::FP16) {
index b92e31328749deb0ba0ec446f1fe679251f79f78..faee1eb400595d862b6c7c131f42bd64baa02aa3 100644 (file)
@@ -69,8 +69,9 @@ void LSTMLayer::forwardingBatchFirstLSTM(
     for (unsigned int t = 0; t < max_timestep; ++t) {
       Tensor input = input_sample.getSharedDataTensor(
         input_tensor_dim, (reverse ? max_timestep - 1 - t : t) * feature_size);
-      Tensor prev_hidden_state;
-      prev_hidden_state.setTensorType(tensor_type);
+
+      Tensor prev_hidden_state = Tensor(
+        "prev_hidden_state", weight_ih.getFormat(), weight_ih.getDataType());
 
       if (!t) {
         prev_hidden_state = Tensor(unit, tensor_type);
@@ -285,8 +286,9 @@ void LSTMLayer::calcGradientBatchFirstLSTM(
 
           // Temporary variable for d_prev_hidden_state. d_prev_hidden_state
           // already have precalculated values from incomming derivatives
-          Tensor d_prev_hidden_state_temp;
-          d_prev_hidden_state_temp.setTensorType(tensor_type);
+          Tensor d_prev_hidden_state_temp =
+            Tensor("d_prev_hidden_state_temp", tensor_type.format,
+                   tensor_type.data_type);
 
           calcGradientLSTM(
             1, unit, disable_bias, integrate_bias, acti_func,
@@ -383,8 +385,9 @@ void LSTMLayer::calcGradientBatchFirstLSTM(
 
         // Temporary variable for d_prev_hidden_state. d_prev_hidden_state
         // already have precalculated values from incomming derivatives
-        Tensor d_prev_hidden_state_temp;
-        d_prev_cell_state.setTensorType(tensor_type);
+        Tensor d_prev_hidden_state_temp =
+          Tensor("d_prev_hidden_state_temp", tensor_type.format,
+                 tensor_type.data_type);
 
         calcGradientLSTM(1, unit, disable_bias, integrate_bias, acti_func,
                          recurrent_acti_func, input, prev_hidden_state,
@@ -652,9 +655,8 @@ void LSTMLayer::forwarding(RunLayerContext &context, bool training) {
   const Tensor &weight_ih = context.getWeight(wt_idx[LSTMParams::weight_ih]);
   const Tensor &weight_hh = context.getWeight(wt_idx[LSTMParams::weight_hh]);
 
-  TensorDim::TensorType weight_tensor_type = weight_ih.getTensorType();
-  Tensor empty;
-  empty.setTensorType(weight_tensor_type);
+  Tensor empty =
+    Tensor("empty", weight_ih.getFormat(), weight_ih.getDataType());
 
   const Tensor &bias_h = !disable_bias && integrate_bias
                            ? context.getWeight(wt_idx[LSTMParams::bias_h])
@@ -836,9 +838,8 @@ void LSTMLayer::calcGradient(RunLayerContext &context) {
   const Tensor &weight_hh = context.getWeight(wt_idx[LSTMParams::weight_hh]);
   Tensor &d_weight_hh = context.getWeightGrad(wt_idx[LSTMParams::weight_hh]);
 
-  TensorDim::TensorType weight_tensor_type = weight_hh.getTensorType();
-  Tensor empty;
-  empty.setTensorType(weight_tensor_type);
+  Tensor empty =
+    Tensor("empty", weight_hh.getFormat(), weight_hh.getDataType());
 
   Tensor &d_bias_h = !disable_bias && integrate_bias
                        ? context.getWeightGrad(wt_idx[LSTMParams::bias_h])
index 28caa54af6bf42288297c075fb08a41f7bddc3d2..4a578e7d8a3a0b20fdcaa4dac368a4c3b34faec8 100644 (file)
@@ -213,9 +213,8 @@ void LSTMCellLayer::forwarding(RunLayerContext &context, bool training) {
   const Tensor &weight_hh =
     context.getWeight(wt_idx[LSTMCellParams::weight_hh]);
 
-  TensorDim::TensorType weight_tensor_type = weight_ih.getTensorType();
-  Tensor empty;
-  empty.setTensorType(weight_tensor_type);
+  Tensor empty =
+    Tensor("empty", weight_ih.getFormat(), weight_ih.getDataType());
 
   const Tensor &bias_h = !disable_bias && integrate_bias
                            ? context.getWeight(wt_idx[LSTMCellParams::bias_h])
@@ -286,9 +285,8 @@ void LSTMCellLayer::calcGradient(RunLayerContext &context) {
   Tensor &d_weight_hh =
     context.getWeightGrad(wt_idx[LSTMCellParams::weight_hh]);
 
-  TensorDim::TensorType weight_tensor_type = weight_hh.getTensorType();
-  Tensor empty;
-  empty.setTensorType(weight_tensor_type);
+  Tensor empty =
+    Tensor("empty", weight_hh.getFormat(), weight_hh.getDataType());
 
   Tensor &d_bias_h = !disable_bias && integrate_bias
                        ? context.getWeightGrad(wt_idx[LSTMCellParams::bias_h])
@@ -324,8 +322,8 @@ void LSTMCellLayer::calcGradient(RunLayerContext &context) {
     }
   }
 
-  Tensor d_hidden_state_masked;
-  d_hidden_state_masked.setTensorType(weight_tensor_type);
+  Tensor d_hidden_state_masked = Tensor(
+    "d_hidden_state_masked", weight_hh.getFormat(), weight_hh.getDataType());
 
   if (dropout_rate > epsilon) {
     Tensor &dropout_mask =
index 7fbc0336d17710166124b0d1deb4b2d611d90042..d73a6a726daecaf205fa232fd02603543b2efc3d 100644 (file)
@@ -114,8 +114,8 @@ void LSTMCore::calcGradientLSTM(
   Tensor d_output_gate = d_ifgo.getSharedDataTensor(
     {batch_size, 1, 1, unit, tensor_type}, unit * 3, false);
 
-  Tensor activated_cell_state;
-  activated_cell_state.setTensorType(cell_state.getTensorType());
+  Tensor activated_cell_state = Tensor(
+    "activated_cell_state", cell_state.getFormat(), cell_state.getDataType());
 
   acti_func.run_fn(cell_state, activated_cell_state);
   d_hidden_state.multiply_strided(activated_cell_state, d_output_gate);
index fb5d7bd7111f05bf68c4a9b45f4d8acc9fca0bf3..622459a41bbad0271174400a3d4a6747c51659ae 100644 (file)
@@ -500,8 +500,8 @@ void MultiHeadAttentionLayer::forwarding(RunLayerContext &context,
 
   for (unsigned int i = 0; i < mask_dim_height; ++i) {
     for (unsigned int j = i + 1; j < mask_dim_width; ++j) {
-      causal_mask.setValue(
-        0, 0, i, j, _MASK_NUM(attention_weight.getDataType()));
+      causal_mask.setValue(0, 0, i, j,
+                           _MASK_NUM(attention_weight.getDataType()));
     }
   }
 
@@ -629,9 +629,8 @@ void MultiHeadAttentionLayer::initial_incremental_forwarding(
   Tensor &key = context.getInput(INOUT_INDEX::KEY);
   Tensor &value = context.getInput(INOUT_INDEX::VALUE);
 
-  Tensor empty_tensor;
-
-  empty_tensor.setTensorType(value.getTensorType());
+  Tensor empty_tensor =
+    Tensor("empty_tensor", value.getFormat(), value.getDataType());
 
   Tensor &mask =
     provide_attention_mask ? context.getInput(INOUT_INDEX::MASK) : empty_tensor;
@@ -909,9 +908,8 @@ void MultiHeadAttentionLayer::incremental_forwarding(RunLayerContext &context,
   Tensor &key = context.getInput(INOUT_INDEX::KEY);
   Tensor &value = context.getInput(INOUT_INDEX::VALUE);
 
-  Tensor empty_tensor;
-
-  empty_tensor.setTensorType(value.getTensorType());
+  Tensor empty_tensor =
+    Tensor("empty_tensor", value.getFormat(), value.getDataType());
 
   Tensor &mask =
     provide_attention_mask ? context.getInput(INOUT_INDEX::MASK) : empty_tensor;