[OpenCL] enable-opencl flag added to build option
authorDebadri Samaddar <s.debadri@samsung.com>
Wed, 7 Feb 2024 10:39:37 +0000 (16:09 +0530)
committerJijoong Moon <jijoong.moon@samsung.com>
Thu, 7 Mar 2024 10:07:57 +0000 (19:07 +0900)
enable-opencl flag added which defines ENABLE_OPENCL macro internally.
Fixed clang issues.

** Self-evaluation**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test:   [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Debadri Samaddar <s.debadri@samsung.com>
22 files changed:
meson.build
meson_options.txt
nntrainer/meson.build
nntrainer/opencl/meson.build
nntrainer/opencl/opencl_buffer.cpp
nntrainer/opencl/opencl_buffer.hpp
nntrainer/opencl/opencl_command_queue_manager.cpp
nntrainer/opencl/opencl_command_queue_manager.hpp
nntrainer/opencl/opencl_context_manager.cpp
nntrainer/opencl/opencl_context_manager.hpp
nntrainer/opencl/opencl_kernel.cpp
nntrainer/opencl/opencl_kernel.hpp
nntrainer/opencl/opencl_loader.cpp
nntrainer/opencl/opencl_loader.hpp
nntrainer/opencl/opencl_op_interface.hpp
nntrainer/opencl/opencl_program.cpp
nntrainer/opencl/opencl_program.hpp
nntrainer/tensor/cl_interface.h
nntrainer/tensor/cl_operations/cl_sgemv.cpp
nntrainer/tensor/cl_operations/cl_sgemv.hpp
nntrainer/tensor/meson.build
nntrainer/tensor/tensor.cpp

index a16361e4a8ce8e73c9696c68587d197d77798e65..bdbd2a50b429538dd58f85a37558ca3591df6f00 100644 (file)
@@ -93,6 +93,11 @@ if get_option('enable-fp16')
      endif
    endif  
 endif
+
+if get_option('enable-opencl')
+  message ('OpenCL build is enabled. Will work only if OpenCL supported GPU is available.')
+  extra_defines += '-DENABLE_OPENCL=1'
+endif
     
 foreach extra_arg : warning_flags
   if cc.has_argument (extra_arg)
index 293c9690b147ecea32f6a42c41a9382bcb45d6d4..80681c7fdede18cb4f83f6dda92cbdb226355a88 100644 (file)
@@ -40,6 +40,7 @@ option('enable-cublas', type: 'boolean', value: false)
 option('enable-openmp', type: 'boolean', value: true)
 option('enable-neon', type: 'boolean', value: false)
 option('enable-avx', type: 'boolean', value: false)
+option('enable-opencl', type: 'boolean', value: false)
 
 # ml-api dependency (to enable, install capi-inference from github.com/nnstreamer/api )
 # To inter-operate with nnstreamer and ML-API packages, you need to enable this.
index 41d3d0142a01cad1f599290326c76214b73bfd58..c852b7f890127e41b1f4df758eeb92a8652576d2 100644 (file)
@@ -42,10 +42,13 @@ nntrainer_elements = [
   'optimizers',
   'tensor',
   'utils',
-  'graph',
-  'opencl'
+  'graph'
 ]
 
+if get_option('enable-opencl')
+  nntrainer_elements += 'opencl'
+endif
+
 foreach elem : nntrainer_elements
   subdir(elem)
   nntrainer_inc += include_directories(elem)
index 5a4c30ec04475e3db3aeea4251b45592ca1c5a35..fb3a57e01adeed8ad3f2dfb5ebf7c12684104dee 100644 (file)
@@ -19,4 +19,4 @@ endforeach
 
 foreach h : opencl_headers
   nntrainer_headers += meson.current_source_dir() / h
-endforeach
\ No newline at end of file
+endforeach
index 1eca07a55871b07d701ca5c17f0c28d6818e066b..a26b351e7918ed4b9418d1611e80bfc90a72c22f 100644 (file)
@@ -73,4 +73,4 @@ void Buffer::Release() {
   size_ = 0;
 }
 
-} // namespace nntrainer::internal
\ No newline at end of file
+} // namespace nntrainer::internal
index 7bc1de9fd74149d451383f43b0dc4459c54bb458..868854e75742398bb5d6d53fe62c7c298619d0c9 100644 (file)
@@ -14,9 +14,9 @@
 #ifndef GPU_CL_OPENCL_BUFFER_HPP_
 #define GPU_CL_OPENCL_BUFFER_HPP_
 
-#include "third_party/cl.h"
 #include "opencl_command_queue_manager.hpp"
 #include "opencl_context_manager.hpp"
+#include "third_party/cl.h"
 
 namespace nntrainer::internal {
 class Buffer {
@@ -42,4 +42,4 @@ public:
   bool ReadData(CommandQueueManager &command_queue_inst, void *data);
 };
 } // namespace nntrainer::internal
-#endif // GPU_CL_OPENCL_BUFFER_HPP_
\ No newline at end of file
+#endif // GPU_CL_OPENCL_BUFFER_HPP_
index a745adbb84b7f7b42bc9f55fc3107f6023b9aa31..60dbf01586793d1448ab3069cfb22d7a52ff9abc 100644 (file)
@@ -116,4 +116,4 @@ bool CommandQueueManager::DispatchCommand(Kernel kernel,
   return true;
 }
 
-} // namespace nntrainer::internal
\ No newline at end of file
+} // namespace nntrainer::internal
index 26615288acea880b0f2658e2217571eaf768d686..36d07c33bf30ca42f2dda162b82ffb507d820bf1 100644 (file)
@@ -14,8 +14,8 @@
 #ifndef GPU_CL_OPENCL_COMMAND_QUEUE_MANAGER_HPP_
 #define GPU_CL_OPENCL_COMMAND_QUEUE_MANAGER_HPP_
 
-#include "third_party/cl.h"
 #include "opencl_kernel.hpp"
+#include "third_party/cl.h"
 
 namespace nntrainer::internal {
 class CommandQueueManager {
@@ -45,4 +45,4 @@ public:
 };
 } // namespace nntrainer::internal
 
-#endif // GPU_CL_OPENCL_COMMAND_QUEUE_MANAGER_HPP_
\ No newline at end of file
+#endif // GPU_CL_OPENCL_COMMAND_QUEUE_MANAGER_HPP_
index e362a95f4f91cc76f122b93ed2b4a4c9eb1ace4f..0b9c59d53953362d3943972459292df925cea79f 100644 (file)
@@ -134,4 +134,4 @@ bool ContextManager::CreateCLContext() {
 
   return true;
 }
-} // namespace nntrainer::internal
\ No newline at end of file
+} // namespace nntrainer::internal
index ff1fcaf1bcde46d028d7313bcc5e9cb4292fc99d..3cc6584214647c3d4b7364b26cc34111b314f57d 100644 (file)
@@ -42,4 +42,4 @@ public:
   ~ContextManager();
 };
 } // namespace nntrainer::internal
-#endif // GPU_CL_OPENCL_CONTEXT_MANAGER_HPP_
\ No newline at end of file
+#endif // GPU_CL_OPENCL_CONTEXT_MANAGER_HPP_
index 3d228b3f23399c9eeca7e8c06190e01716bf0cfe..e4d7047902fc3d694e9c459b6e2a671361c2f104 100644 (file)
@@ -27,7 +27,7 @@ bool Kernel::CreateKernelFromProgram(Program program,
   if (!kernel_ || error_code != CL_SUCCESS) {
     kernel_ = nullptr;
     ml_loge("Failed to create %s. OpenCL error code: %d", function_name.c_str(),
-        error_code);
+            error_code);
     return false;
   }
   clRetainProgram(prgm);
index f4e05505736c4c501f8a58f8faeb13470b63a498..a26c50c2be805c2c1a008ae44c41c38ac0c4f7ad 100644 (file)
@@ -32,4 +32,4 @@ public:
   const cl_kernel GetKernel();
 };
 } // namespace nntrainer::internal
-#endif // GPU_CL_OPENCL_KERNEL_HPP_
\ No newline at end of file
+#endif // GPU_CL_OPENCL_KERNEL_HPP_
index 23dd729a71e6a8261589632d17e913476df0e295..80fcd956b55d6e1a669fc69d7777585dc242db89 100644 (file)
@@ -15,8 +15,8 @@
 
 #include <dlfcn.h>
 
-#include <string>
 #include <nntrainer_log.h>
+#include <string>
 
 namespace nntrainer::internal {
 
@@ -92,4 +92,4 @@ PFN_clRetainCommandQueue clRetainCommandQueue;
 PFN_clReleaseCommandQueue clReleaseCommandQueue;
 PFN_clReleaseMemObject clReleaseMemObject;
 
-} // namespace nntrainer::internal
\ No newline at end of file
+} // namespace nntrainer::internal
index 30e2cb933586e16b28a8d6b00a18f71531e6be33..65376e0e16eaf45484fabefb98264b2f5064e6fd 100644 (file)
@@ -136,4 +136,4 @@ extern PFN_clReleaseMemObject clReleaseMemObject;
 
 } // namespace nntrainer::internal
 
-#endif // GPU_CL_OPENCL_LOADER_HPP_
\ No newline at end of file
+#endif // GPU_CL_OPENCL_LOADER_HPP_
index 7d783822d64f1a05cd6423ce0d66e5757811c8cb..72c1cc2638b2fe50ef7ea864a35c1639a47cff8c 100644 (file)
@@ -38,4 +38,4 @@ protected:
 };
 } // namespace nntrainer::internal
 
-#endif // GPU_CL_OP_INTERFACE_HPP_
\ No newline at end of file
+#endif // GPU_CL_OP_INTERFACE_HPP_
index 8f41e199521939a5f06c7a96594df5bcb8450cc4..dcc1803e8c284750d3dfaad90fcd6be86be1d502 100644 (file)
@@ -75,4 +75,4 @@ bool Program::CreateCLProgram(const cl_context &context,
 
 const cl_program &Program::GetProgram() { return program_; }
 
-} // namespace nntrainer::internal
\ No newline at end of file
+} // namespace nntrainer::internal
index 4e07a7550fa90de8b7eed8c4302bab9f9d45717c..2e02f5d5c4be399dbae48969964310b0f8149570 100644 (file)
@@ -34,4 +34,4 @@ public:
   const cl_program &GetProgram();
 };
 } // namespace nntrainer::internal
-#endif // GPU_CL_OPENCL_PROGRAM_HPP_
\ No newline at end of file
+#endif // GPU_CL_OPENCL_PROGRAM_HPP_
index 5386d1559faeb5272d2c9e3dc2fa3c7d5a0859f9..0b93bd7c8586b0e03b6d52eaaaeffae7ba7a5cb2 100644 (file)
@@ -35,4 +35,4 @@ void gpu_sgemv(const float *A, const float *X, float *Y, float alpha,
 }
 } // namespace nntrainer
 
-#endif
\ No newline at end of file
+#endif
index 2deb4df77ad77e2920a53554c6f05fee96228c66..725007ed8855729fc46885a818072efd177ff42a 100644 (file)
@@ -117,4 +117,4 @@ template float *GpuCLSgemv::CLSgemv<float>(const float *matAdata,
                                            float beta, unsigned int dim1,
                                            unsigned int dim2);
 
-} // namespace nntrainer::internal
\ No newline at end of file
+} // namespace nntrainer::internal
index 5b46b0720e862f513ef78e0ca57151eeff57485f..79bcfd528a39df7944ea096f0e3d2c92cfe60946 100644 (file)
@@ -35,4 +35,4 @@ public:
 };
 } // namespace nntrainer::internal
 
-#endif // GPU_CL_SGEMV_HPP_
\ No newline at end of file
+#endif // GPU_CL_SGEMV_HPP_
index 1f3a22387c16be53e2eb372186490ce772125b1d..025bbdd717406e2cae420e13e780b62938b37d96 100644 (file)
@@ -20,7 +20,6 @@ tensor_sources = [
   'optimized_v2_planner.cpp',
   'optimized_v3_planner.cpp',
   'task_executor.cpp',
-  'cl_operations/cl_sgemv.cpp'
 ]
 
 tensor_headers = [
@@ -32,7 +31,14 @@ tensor_headers = [
   'weight.h',
   'var_grad.h',    
   'tensor_wrap_specs.h',
-  'blas_interface.h',
+  'blas_interface.h'
+]
+
+cl_sources = [
+  'cl_operations/cl_sgemv.cpp'
+]
+
+cl_headers = [
   'cl_interface.h'
 ]
 
@@ -56,6 +62,11 @@ if get_option('enable-fp16')
   tensor_sources += 'half_tensor.cpp'
 endif
 
+if get_option('enable-opencl')
+  tensor_sources += cl_sources
+  tensor_headers += cl_headers
+endif
+
 foreach s : tensor_sources
   nntrainer_sources += meson.current_source_dir() / s
 endforeach
index 070838809244f206434e255f24798783dd38b3a1..1061339f8ea71d573d75c8c652f65648381f8033 100644 (file)
 #include <stdexcept>
 #include <stdio.h>
 
+#ifdef ENABLE_OPENCL
 #include "cl_interface.h"
+#endif
+
 #include <lazy_tensor.h>
 #include <tensor.h>
 #include <util_func.h>
@@ -1779,8 +1782,12 @@ Tensor Tensor::sum_by_batch(bool GPUExecute) const {
     // sgemv(CblasRowMajor, CblasNoTrans, batch, feat_len, 1, data, feat_len,
     //       ones.getData<float>(), 1, 0.0, rdata, 1);
     if (GPUExecute) {
+#ifdef ENABLE_OPENCL
       gpu_sgemv(data, (const float *)ones.getData<float>(), rdata, 1.0f, 0.0f,
                 batch, feat_len);
+#else
+      ml_loge("%s", "Error: enable-opencl is not enabled");
+#endif
     } else {
       sgemv(CblasRowMajor, CblasNoTrans, batch, feat_len, 1, data, feat_len,
             ones.getData<float>(), 1, 0.0, rdata, 1);
@@ -1849,8 +1856,12 @@ Tensor &Tensor::sum(unsigned int axis, Tensor &ret, float alpha, float beta,
         // sgemv(CblasRowMajor, CblasNoTrans, m, n, 1, data, n,
         //       ones.getData<float>(), 1, beta, ret.getData<float>(), 1);
         if (GPUExecute) {
+#ifdef ENABLE_OPENCL
           gpu_sgemv(data, (const float *)ones.getData<float>(),
                     ret.getData<float>(), 1.0f, beta, m, n);
+#else
+          ml_loge("%s", "Error: enable-opencl is not enabled");
+#endif
         } else {
           sgemv(CblasRowMajor, CblasNoTrans, m, n, 1, data, n,
                 ones.getData<float>(), 1, beta, ret.getData<float>(), 1);
@@ -1866,9 +1877,13 @@ Tensor &Tensor::sum(unsigned int axis, Tensor &ret, float alpha, float beta,
           //       &data[k * dim.getFeatureLen()], feat_len,
           //       ones.getData<float>(), 1, beta, &rdata[k * feat_len], 1);
           if (GPUExecute) {
+#ifdef ENABLE_OPENCL
             gpu_sgemv(&data[k * dim.getFeatureLen()],
                       (const float *)ones.getData<float>(),
                       &rdata[k * feat_len], 1.0f, beta, t_axis, feat_len);
+#else
+            ml_loge("%s", "Error: enable-opencl is not enabled");
+#endif
           } else {
             sgemv(CblasRowMajor, CblasTrans, t_axis, feat_len, 1,
                   &data[k * dim.getFeatureLen()], feat_len,
@@ -1942,8 +1957,12 @@ Tensor &Tensor::sum(unsigned int axis, Tensor &ret, float alpha, float beta,
           // sgemv(CblasRowMajor, CblasNoTrans, m, n, 1, data, n,
           //       ones.getData<float>(), 1, beta, ret.getData<float>(), 1);
           if (GPUExecute) {
+#ifdef ENABLE_OPENCL
             gpu_sgemv(data, (const float *)ones.getData<float>(),
                       ret.getData<float>(), 1.0f, beta, m, n);
+#else
+            ml_loge("%s", "Error: enable-opencl is not enabled");
+#endif
           } else {
             sgemv(CblasRowMajor, CblasNoTrans, m, n, 1, data, n,
                   ones.getData<float>(), 1, beta, ret.getData<float>(), 1);