+++ /dev/null
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Copyright (C) 2024 Debadri Samaddar <s.debadri@samsung.com>
- *
- * @file cl_interface.h
- * @date 06 Feb 2024
- * @see https://github.com/nnstreamer/nntrainer
- * @author Debadri Samaddar <s.debadri@samsung.com>
- * @bug No known bugs except for NYI items
- * @brief Interface for GPU tensor operations
- *
- * @note This file is experimental and is kept for testing purpose
- *
- */
-
-#ifndef __CL_INTERFACE_H_
-#define __CL_INTERFACE_H_
-
-#include "cl_sgemv.h"
-
-namespace nntrainer {
-
-/**
- * @brief sgemv computation on GPU : Y = alpha*A*X + beta*Y
- * @param[in] A float * for Matrix A
- * @param[in] X float * for Vector X
- * @param[in] Y float * for Vector Y
- * @param[in] alpha float number
- * @param[in] beta float number
- * @param[in] rows number of A's row
- * @param[in] cols number of A's columns
- */
-void gpu_sgemv(const float *A, const float *X, float *Y, float alpha,
- float beta, unsigned int rows, unsigned int cols) {
- static internal::GpuCLSgemv cl_gpu_sgemv;
- cl_gpu_sgemv.cLSgemv(A, X, Y, alpha, beta, rows, cols);
-}
-} // namespace nntrainer
-
-#endif
+++ /dev/null
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Copyright (C) 2024 Debadri Samaddar <s.debadri@samsung.com>
- *
- * @file cl_sgemv.cpp
- * @date 06 Feb 2024
- * @see https://github.com/nnstreamer/nntrainer
- * @author Debadri Samaddar <s.debadri@samsung.com>
- * @bug No known bugs except for NYI items
- * @brief Experimental SGEMV implementation using OpenCL
- *
- * @note This file is experimental and is kept for testing purpose
- *
- */
-
-#include "cl_sgemv.h"
-#include <opencl_buffer.h>
-
-#include <nntrainer_log.h>
-
-namespace nntrainer::internal {
-
-template <typename T>
-T *GpuCLSgemv::cLSgemv(const T *matAdata, const T *vecXdata, T *vecYdata,
- T alpha, T beta, unsigned int dim1, unsigned int dim2) {
-
- ml_logi("GpuCLSgemv::CLSgemv");
-
- bool result = false;
-
- do {
- result = Init(sgemv_kernel_, "sgemv");
- if (!result) {
- break;
- }
-
- size_t dim1_size = sizeof(T) * dim1;
- size_t dim2_size = sizeof(T) * dim2;
- opencl::Buffer inputA(context_inst_, dim1_size * dim2_size, true, nullptr);
-
- opencl::Buffer inputX(context_inst_, dim1_size, true, nullptr);
-
- opencl::Buffer inOutY(context_inst_, dim2_size, true, nullptr);
-
- result = inputA.WriteData(command_queue_inst_, matAdata);
- if (!result) {
- break;
- }
-
- result = inputX.WriteData(command_queue_inst_, vecXdata);
- if (!result) {
- break;
- }
-
- result = inOutY.WriteData(command_queue_inst_, vecYdata);
- if (!result) {
- break;
- }
-
- result = kernel_.SetKernelArguments(0, &inputA, sizeof(cl_mem));
- if (!result) {
- break;
- }
-
- result = kernel_.SetKernelArguments(1, &inputX, sizeof(cl_mem));
- if (!result) {
- break;
- }
-
- result = kernel_.SetKernelArguments(2, &inOutY, sizeof(cl_mem));
- if (!result) {
- break;
- }
-
- result = kernel_.SetKernelArguments(3, &alpha, sizeof(T));
- if (!result) {
- break;
- }
-
- result = kernel_.SetKernelArguments(4, &beta, sizeof(T));
- if (!result) {
- break;
- }
-
- result = kernel_.SetKernelArguments(5, &dim1, sizeof(int));
- if (!result) {
- break;
- }
-
- result = kernel_.SetKernelArguments(6, &dim2, sizeof(int));
- if (!result) {
- break;
- }
-
- const int work_groups_count[3] = {(int)dim1, 1, 1};
- const int work_group_size[3] = {32, 32, 1}; // test-value
-
- result = command_queue_inst_.DispatchCommand(kernel_, work_groups_count,
- work_group_size);
- if (!result) {
- break;
- }
-
- result = inOutY.ReadData(command_queue_inst_, vecYdata);
- if (!result) {
- break;
- }
-
- } while (false);
-
- return vecYdata;
-}
-
-/**
- * @brief Template declaration for float CLSgemv call
- *
- */
-template float *GpuCLSgemv::cLSgemv<float>(const float *matAdata,
- const float *vecXdata,
- float *vecYdata, float alpha,
- float beta, unsigned int dim1,
- unsigned int dim2);
-
-} // namespace nntrainer::internal
+++ /dev/null
-// SPDX-License-Identifier: Apache-2.0
-/**
- * Copyright (C) 2024 Debadri Samaddar <s.debadri@samsung.com>
- *
- * @file cl_sgemv.h
- * @date 06 Feb 2024
- * @see https://github.com/nnstreamer/nntrainer
- * @author Debadri Samaddar <s.debadri@samsung.com>
- * @bug No known bugs except for NYI items
- * @brief Experimental SGEMV implementation using OpenCL
- *
- * @note This file is experimental and is kept for testing purpose
- *
- */
-
-#ifndef __CL_SGEMV_H__
-#define __CL_SGEMV_H__
-
-#include <opencl_op_interface.h>
-
-namespace nntrainer::internal {
-/**
- * @class GpuCLSgemv class
- * @brief Kernel and implementation of naive SGEMV. USed for
- * testing/experimentation.
- */
-class GpuCLSgemv : public nntrainer::opencl::GpuCLOpInterface {
- std::string sgemv_kernel_ =
- R"(__kernel void sgemv(const __global float* A, const __global float* X,
- __global float* Y, float alpha, float beta, unsigned int M, unsigned int N) {
- const int row = get_global_id(0);
- Y[row] = Y[row] * beta;
- for (unsigned int j = 0; j < N; j++){
- Y[row] += alpha * A[row * N + j] * X[j];
- }
- })";
-
-public:
- /**
- * @brief Function to set buffers and kernel arguments for SGEMV
- *
- * @tparam T
- * @param matAdata
- * @param vecXdata
- * @param vecYdata
- * @param alpha
- * @param beta
- * @param dim1
- * @param dim2
- * @return T*
- */
- template <typename T>
- T *cLSgemv(const T *matAdata, const T *vecXdata, T *vecYdata, T alpha, T beta,
- unsigned int dim1, unsigned int dim2);
-};
-} // namespace nntrainer::internal
-
-#endif // __CL_SGEMV_H__