Added addition kernel to enhance reusability of the common blas kernels.
Used AdditionLayer interface for both CPU and GPU calls.
Signed-off-by: yash.singh <yash.singh@samsung.com>
[GPU/OpenCL] Initial version of Addition Layer with OpenCL ops
Added naive version of OpenCL implementation for Addition Layer.
Incorporated kernel for ops used.
Added unit test for addition_layer_cl.
Signed-off-by: yash.singh <yash.singh@samsung.com>
[GPU/OpenCL] Addition Kernel added in reusable blas OpenCL kernels
Added addition kernel to enhance reusability of the common blas kernels.
Used AdditionLayer interface for both CPU and GPU calls.
Signed-off-by: yash.singh <yash.singh@samsung.com>
/**
* @brief Helper function to create addition layer
*/
-inline std::unique_ptr<Layer>
-Addition(const std::vector<std::string> &properties = {}) {
- return createLayer(LayerType::LAYER_ADDITION, properties);
-}
-
-#ifdef ENABLE_OPENCL
-/**
- * @brief Helper function to create Addition layer for GPU
- */
-inline std::unique_ptr<Layer>
-AdditionCL(const std::vector<std::string> &properties = {},
- const LayerComputeEngine &compute_engine = LayerComputeEngine::CPU) {
+inline std::unique_ptr<Layer> Addition(
+ const std::vector<std::string> &properties = {},
+ const LayerComputeEngine &compute_engine = LayerComputeEngine::CPU) {
return createLayer(LayerType::LAYER_ADDITION, properties, compute_engine);
}
-#endif
/**
* @brief Helper function to create concat layer
* Copyright (C) 2024 Yash Singh <yash.singh@samsung.com>
*
* @file addition_layer_cl.cpp
- * @date 17 May 2024
+ * @date 28 May 2024
* @see https://github.com/nnstreamer/nntrainer
* @author Yash Singh yash.singh@samsung.com>
* @bug No known bugs except for NYI items
* implementation
*/
+#include <blas_kernels.h>
#include <addition_layer_cl.h>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
#include <layer_context.h>
-std::string addition_cl_kernel_ =
- R"(__kernel void addition_cl(__global const float* input, __global float* output, const unsigned int size) {
- #pragma printf_support
- size_t idx = get_global_id(0);
- if (idx < size) {
- output[idx] = output[idx] + input[idx];
- }
-})";
-
namespace nntrainer {
static constexpr size_t SINGLE_INOUT_IDX = 0;
if (!idx) {
hidden_.copy(input_);
} else {
- // hidden_.add_i(input_);
AddProcess(input_, hidden_, context);
}
}
}
-/**
- * @brief declaring static kerinputnel objects
- *
- */
-opencl::Kernel AdditionLayerCL::kernel_addition;
-
void AdditionLayerCL::AddProcess(Tensor const &input, Tensor &result,
RunLayerContext &context) {
throw std::invalid_argument("Error: OpenCL fp16 is not supported yet.");
}
-void AdditionLayerCL::addition_cl(const float *input, float *res,
- unsigned int size, RunLayerContext &context) {
-
- bool result = false;
- do {
- result = result =
- context.clCreateKernel(addition_cl_kernel_, context.LayerKernel::ADD,
- AdditionLayerCL::kernel_addition);
- if (!result) {
- break;
- }
-
- size_t dim1_size = sizeof(float) * size;
- opencl::Buffer inputA(context.context_inst_, dim1_size, true, nullptr);
-
- opencl::Buffer inOutRes(context.context_inst_, dim1_size, true, nullptr);
-
- result = inputA.WriteData(context.command_queue_inst_, input);
- if (!result) {
- break;
- }
-
- result = inOutRes.WriteData(context.command_queue_inst_, res);
- if (!result) {
- break;
- }
-
- result = AdditionLayerCL::kernel_addition.SetKernelArguments(
- 0, &inputA, sizeof(cl_mem));
- if (!result) {
- break;
- }
-
- result = AdditionLayerCL::kernel_addition.SetKernelArguments(
- 1, &inOutRes, sizeof(cl_mem));
- if (!result) {
- break;
- }
-
- result = AdditionLayerCL::kernel_addition.SetKernelArguments(2, &size,
- sizeof(int));
- if (!result) {
- break;
- }
-
- const int work_groups_count[3] = {(int)size, 1, 1};
- const int work_group_size[3] = {32, 32, 1}; // test-value
- result = context.command_queue_inst_.DispatchCommand(
- AdditionLayerCL::kernel_addition, work_groups_count, work_group_size);
- if (!result) {
- break;
- }
-
- result = inOutRes.ReadData(context.command_queue_inst_, res);
- if (!result) {
- break;
- }
-
- } while (false);
-}
-
void AdditionLayerCL::incremental_forwarding(RunLayerContext &context,
unsigned int from, unsigned int to,
bool training) {
if (!idx) {
hidden_step.copy(input_step);
} else {
- // hidden_step.add_i(input_step);
AddProcess(input_step, hidden_step, context);
}
}
* Copyright (C) 2024 Yash Singh <yash.singh@samsung.com>
*
* @file addition_layer_cl.h
- * @date 17 May 2024
+ * @date 28 May 2024
* @see https://github.com/nnstreamer/nntrainer
* @author Yash Singh yash.singh@samsung.com>
* @bug No known bugs except for NYI items
#include <common_properties.h>
#include <layer_devel.h>
-#include <opencl_buffer.h>
-#include <opencl_kernel.h>
#define CREATE_IF_EMPTY_DIMS(tensor, ...) \
do { \
*/
void calcDerivative(RunLayerContext &context) override;
- /**
- * @brief declaring static kernel objects
- */
- static opencl::Kernel kernel_addition;
-
/**
* @brief Process data and dimensions for add operation used in addition layer
* @param[in] input Tensor
void AddProcess(Tensor const &input, Tensor &result,
RunLayerContext &context);
- /**
- * @brief addition : sum of all input vectors
- * @param[in] input float * for input
- * @param[in] res float * for result/output
- * @param[in] size number of elements in input vector
- * @param[in] context RunLayerContext reference
- */
- void addition_cl(const float *input, float *res, unsigned int size,
- RunLayerContext &context);
-
/**
* @copydoc bool supportBackwarding() const
*/
C[m * ldc + n] = c;
})";
+std::string addition_cl_kernel_ =
+ R"(__kernel void addition_cl(__global const float* input, __global float* output, const unsigned int size) {
+ #pragma printf_support
+ size_t idx = get_global_id(0);
+ if (idx < size) {
+ output[idx] = output[idx] + input[idx];
+ }
+ })";
+
/**
* @brief defining global kernel objects
*/
opencl::Kernel kernel_sgemv;
opencl::Kernel kernel_sgemm;
opencl::Kernel kernel_dot;
+opencl::Kernel kernel_addition;
void sgemv_cl(const float *matAdata, const float *vecXdata, float *vecYdata,
unsigned int dim1, unsigned int dim2, unsigned int lda,
} while (false);
}
+void addition_cl(const float *input, float *res,
+ unsigned int size, RunLayerContext &context) {
+
+ bool result = false;
+
+ do {
+ result = result =
+ context.clCreateKernel(addition_cl_kernel_, context.LayerKernel::ADD,
+ kernel_addition);
+ if (!result) {
+ break;
+ }
+
+ size_t dim1_size = sizeof(float) * size;
+ opencl::Buffer inputA(context.context_inst_, dim1_size, true, nullptr);
+
+ opencl::Buffer inOutRes(context.context_inst_, dim1_size, true, nullptr);
+
+ result = inputA.WriteData(context.command_queue_inst_, input);
+ if (!result) {
+ break;
+ }
+
+ result = inOutRes.WriteData(context.command_queue_inst_, res);
+ if (!result) {
+ break;
+ }
+
+ result = kernel_addition.SetKernelArguments(0, &inputA, sizeof(cl_mem));
+ if (!result) {
+ break;
+ }
+
+ result = kernel_addition.SetKernelArguments(1, &inOutRes, sizeof(cl_mem));
+ if (!result) {
+ break;
+ }
+
+ result = kernel_addition.SetKernelArguments(2, &size, sizeof(int));
+ if (!result) {
+ break;
+ }
+
+ const int work_groups_count[3] = {(int)size, 1, 1};
+ const int work_group_size[3] = {32, 32, 1}; // test-value
+ result = context.command_queue_inst_.DispatchCommand(
+ kernel_addition, work_groups_count, work_group_size);
+ if (!result) {
+ break;
+ }
+
+ result = inOutRes.ReadData(context.command_queue_inst_, res);
+ if (!result) {
+ break;
+ }
+
+ } while (false);
+}
} // namespace nntrainer
extern opencl::Kernel kernel_sgemv;
extern opencl::Kernel kernel_sgemm;
extern opencl::Kernel kernel_dot;
+extern opencl::Kernel kernel_dot_fp16;
+extern opencl::Kernel kernel_addition;
/**
* @brief sgemv computation : Y = A*X + Y
unsigned int ldb, unsigned int ldc, RunLayerContext &context);
#endif
+/**
+ * @brief addition : sum of all input vectors
+ * @param[in] input float * for input
+ * @param[in] res float * for result/output
+ * @param[in] size number of elements in input vector
+ * @param[in] context RunLayerContext reference
+ */
+void addition_cl(const float *input, float *res, unsigned int size,
+ RunLayerContext &context);
+
} // namespace nntrainer
#endif /* __BLAS_KERNELS_H__ */