Moved addition_cl kernel to Tensor directory.
Refactored addition_cl for generalization.
Signed-off-by: Yash Singh <yash.singh@samsung.com>
*/
#include <addition_layer_cl.h>
-#include <blas_kernels.h>
+#include <blas_kernel_interface.h>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
#include <node_exporter.h>
if (!idx) {
hidden_.copy(input_);
} else {
- AddProcess(input_, hidden_, context);
+ add_i_cl(input_, hidden_, context);
}
}
}
-void AdditionLayerCL::AddProcess(Tensor const &input, Tensor &result,
- RunLayerContext &context) {
-
- CREATE_IF_EMPTY_DIMS(result, result.getDim());
-
- NNTR_THROW_IF(result.getData() == nullptr, std::invalid_argument)
- << result.getName() << " is not allocated";
- NNTR_THROW_IF(input.getData() == nullptr, std::invalid_argument)
- << input.getName() << " is not allocated";
-
- if (input.getDim() != result.getDim()) {
- throw std::invalid_argument(
- "Error: Dimensions does not match for addition");
- }
-
- if (input.getDataType() == ml::train::TensorDim::DataType::FP32) {
- unsigned int size = input.size();
- const float *data = input.getData();
- float *rdata = result.getData();
-
- addition_cl(data, rdata, size, context);
-
- } else if (input.getDataType() == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
- unsigned int size = input.size();
- const _FP16 *data = input.getData<_FP16>();
- _FP16 *rdata = result.getData<_FP16>();
-
- addition_cl(data, rdata, size, context);
-
-#else
- throw std::invalid_argument("Error: enable-fp16 is not enabled");
-#endif
- }
-}
-
void AdditionLayerCL::incremental_forwarding(RunLayerContext &context,
unsigned int from, unsigned int to,
bool training) {
if (!idx) {
hidden_step.copy(input_step);
} else {
- AddProcess(input_step, hidden_step, context);
+ add_i_cl(input_step, hidden_step, context);
}
}
}
*/
void calcDerivative(RunLayerContext &context) override;
- /**
- * @brief Process data and dimensions for add operation used in addition layer
- * @param[in] input Tensor
- * @param[in] result Tensor
- * @param[in] RunLayerContext reference
- */
- void AddProcess(Tensor const &input, Tensor &result,
- RunLayerContext &context);
-
/**
* @copydoc bool supportBackwarding() const
*/
}
}
+void add_i_cl(Tensor const &input, Tensor &result, RunLayerContext &context) {
+
+ CREATE_IF_EMPTY_DIMS(result, result.getDim());
+
+ NNTR_THROW_IF(result.getData() == nullptr, std::invalid_argument)
+ << result.getName() << " is not allocated";
+ NNTR_THROW_IF(input.getData() == nullptr, std::invalid_argument)
+ << input.getName() << " is not allocated";
+
+ if (input.getDim() != result.getDim()) {
+ throw std::invalid_argument(
+ "Error: Dimensions does not match for addition");
+ }
+
+ if (input.getDataType() == ml::train::TensorDim::DataType::FP32) {
+ unsigned int size = input.size();
+ const float *data = input.getData();
+ float *rdata = result.getData();
+
+ addition_cl(data, rdata, size, context);
+
+ } else if (input.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+ unsigned int size = input.size();
+ const _FP16 *data = input.getData<_FP16>();
+ _FP16 *rdata = result.getData<_FP16>();
+
+ addition_cl(data, rdata, size, context);
+
+#else
+ throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
+ }
+}
+
} // namespace nntrainer
*/
void multiplyCl(Tensor &input, float const &value, RunLayerContext &context);
+/**
+ * @brief Process data and dimensions for add operation
+ * @param[in] input Tensor
+ * @param[in] result Tensor
+ * @param[in] RunLayerContext reference
+ */
+void add_i_cl(Tensor const &input, Tensor &result, RunLayerContext &context);
+
} // namespace nntrainer
#endif /* __BLAS_KERNEL_INTERFACE_H__ */
"added_w16a16.nnlayergolden", LayerGoldenTestParamOptions::DEFAULT, "nchw",
"fp16", "fp16");
-GTEST_PARAMETER_TEST(Addition16, LayerGoldenTest,
+GTEST_PARAMETER_TEST(AdditionGPU16, LayerGoldenTest,
::testing::Values(addition_w16a16_gpu));
#endif