enable-opencl flag added which defines ENABLE_OPENCL macro internally.
Fixed clang issues.
** Self-evaluation**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped
Signed-off-by: Debadri Samaddar <s.debadri@samsung.com>
endif
endif
endif
+
+if get_option('enable-opencl')
+ message ('OpenCL build is enabled. Will work only if OpenCL supported GPU is available.')
+ extra_defines += '-DENABLE_OPENCL=1'
+endif
foreach extra_arg : warning_flags
if cc.has_argument (extra_arg)
option('enable-openmp', type: 'boolean', value: true)
option('enable-neon', type: 'boolean', value: false)
option('enable-avx', type: 'boolean', value: false)
+option('enable-opencl', type: 'boolean', value: false)
# ml-api dependency (to enable, install capi-inference from github.com/nnstreamer/api )
# To inter-operate with nnstreamer and ML-API packages, you need to enable this.
'optimizers',
'tensor',
'utils',
- 'graph',
- 'opencl'
+ 'graph'
]
+if get_option('enable-opencl')
+ nntrainer_elements += 'opencl'
+endif
+
foreach elem : nntrainer_elements
subdir(elem)
nntrainer_inc += include_directories(elem)
foreach h : opencl_headers
nntrainer_headers += meson.current_source_dir() / h
-endforeach
\ No newline at end of file
+endforeach
size_ = 0;
}
-} // namespace nntrainer::internal
\ No newline at end of file
+} // namespace nntrainer::internal
#ifndef GPU_CL_OPENCL_BUFFER_HPP_
#define GPU_CL_OPENCL_BUFFER_HPP_
-#include "third_party/cl.h"
#include "opencl_command_queue_manager.hpp"
#include "opencl_context_manager.hpp"
+#include "third_party/cl.h"
namespace nntrainer::internal {
class Buffer {
bool ReadData(CommandQueueManager &command_queue_inst, void *data);
};
} // namespace nntrainer::internal
-#endif // GPU_CL_OPENCL_BUFFER_HPP_
\ No newline at end of file
+#endif // GPU_CL_OPENCL_BUFFER_HPP_
return true;
}
-} // namespace nntrainer::internal
\ No newline at end of file
+} // namespace nntrainer::internal
#ifndef GPU_CL_OPENCL_COMMAND_QUEUE_MANAGER_HPP_
#define GPU_CL_OPENCL_COMMAND_QUEUE_MANAGER_HPP_
-#include "third_party/cl.h"
#include "opencl_kernel.hpp"
+#include "third_party/cl.h"
namespace nntrainer::internal {
class CommandQueueManager {
};
} // namespace nntrainer::internal
-#endif // GPU_CL_OPENCL_COMMAND_QUEUE_MANAGER_HPP_
\ No newline at end of file
+#endif // GPU_CL_OPENCL_COMMAND_QUEUE_MANAGER_HPP_
return true;
}
-} // namespace nntrainer::internal
\ No newline at end of file
+} // namespace nntrainer::internal
~ContextManager();
};
} // namespace nntrainer::internal
-#endif // GPU_CL_OPENCL_CONTEXT_MANAGER_HPP_
\ No newline at end of file
+#endif // GPU_CL_OPENCL_CONTEXT_MANAGER_HPP_
if (!kernel_ || error_code != CL_SUCCESS) {
kernel_ = nullptr;
ml_loge("Failed to create %s. OpenCL error code: %d", function_name.c_str(),
- error_code);
+ error_code);
return false;
}
clRetainProgram(prgm);
const cl_kernel GetKernel();
};
} // namespace nntrainer::internal
-#endif // GPU_CL_OPENCL_KERNEL_HPP_
\ No newline at end of file
+#endif // GPU_CL_OPENCL_KERNEL_HPP_
#include <dlfcn.h>
-#include <string>
#include <nntrainer_log.h>
+#include <string>
namespace nntrainer::internal {
PFN_clReleaseCommandQueue clReleaseCommandQueue;
PFN_clReleaseMemObject clReleaseMemObject;
-} // namespace nntrainer::internal
\ No newline at end of file
+} // namespace nntrainer::internal
} // namespace nntrainer::internal
-#endif // GPU_CL_OPENCL_LOADER_HPP_
\ No newline at end of file
+#endif // GPU_CL_OPENCL_LOADER_HPP_
};
} // namespace nntrainer::internal
-#endif // GPU_CL_OP_INTERFACE_HPP_
\ No newline at end of file
+#endif // GPU_CL_OP_INTERFACE_HPP_
const cl_program &Program::GetProgram() { return program_; }
-} // namespace nntrainer::internal
\ No newline at end of file
+} // namespace nntrainer::internal
const cl_program &GetProgram();
};
} // namespace nntrainer::internal
-#endif // GPU_CL_OPENCL_PROGRAM_HPP_
\ No newline at end of file
+#endif // GPU_CL_OPENCL_PROGRAM_HPP_
}
} // namespace nntrainer
-#endif
\ No newline at end of file
+#endif
float beta, unsigned int dim1,
unsigned int dim2);
-} // namespace nntrainer::internal
\ No newline at end of file
+} // namespace nntrainer::internal
};
} // namespace nntrainer::internal
-#endif // GPU_CL_SGEMV_HPP_
\ No newline at end of file
+#endif // GPU_CL_SGEMV_HPP_
'optimized_v2_planner.cpp',
'optimized_v3_planner.cpp',
'task_executor.cpp',
- 'cl_operations/cl_sgemv.cpp'
]
tensor_headers = [
'weight.h',
'var_grad.h',
'tensor_wrap_specs.h',
- 'blas_interface.h',
+ 'blas_interface.h'
+]
+
+cl_sources = [
+ 'cl_operations/cl_sgemv.cpp'
+]
+
+cl_headers = [
'cl_interface.h'
]
tensor_sources += 'half_tensor.cpp'
endif
+if get_option('enable-opencl')
+ tensor_sources += cl_sources
+ tensor_headers += cl_headers
+endif
+
foreach s : tensor_sources
nntrainer_sources += meson.current_source_dir() / s
endforeach
#include <stdexcept>
#include <stdio.h>
+#ifdef ENABLE_OPENCL
#include "cl_interface.h"
+#endif
+
#include <lazy_tensor.h>
#include <tensor.h>
#include <util_func.h>
// sgemv(CblasRowMajor, CblasNoTrans, batch, feat_len, 1, data, feat_len,
// ones.getData<float>(), 1, 0.0, rdata, 1);
if (GPUExecute) {
+#ifdef ENABLE_OPENCL
gpu_sgemv(data, (const float *)ones.getData<float>(), rdata, 1.0f, 0.0f,
batch, feat_len);
+#else
+ ml_loge("%s", "Error: enable-opencl is not enabled");
+#endif
} else {
sgemv(CblasRowMajor, CblasNoTrans, batch, feat_len, 1, data, feat_len,
ones.getData<float>(), 1, 0.0, rdata, 1);
// sgemv(CblasRowMajor, CblasNoTrans, m, n, 1, data, n,
// ones.getData<float>(), 1, beta, ret.getData<float>(), 1);
if (GPUExecute) {
+#ifdef ENABLE_OPENCL
gpu_sgemv(data, (const float *)ones.getData<float>(),
ret.getData<float>(), 1.0f, beta, m, n);
+#else
+ ml_loge("%s", "Error: enable-opencl is not enabled");
+#endif
} else {
sgemv(CblasRowMajor, CblasNoTrans, m, n, 1, data, n,
ones.getData<float>(), 1, beta, ret.getData<float>(), 1);
// &data[k * dim.getFeatureLen()], feat_len,
// ones.getData<float>(), 1, beta, &rdata[k * feat_len], 1);
if (GPUExecute) {
+#ifdef ENABLE_OPENCL
gpu_sgemv(&data[k * dim.getFeatureLen()],
(const float *)ones.getData<float>(),
&rdata[k * feat_len], 1.0f, beta, t_axis, feat_len);
+#else
+ ml_loge("%s", "Error: enable-opencl is not enabled");
+#endif
} else {
sgemv(CblasRowMajor, CblasTrans, t_axis, feat_len, 1,
&data[k * dim.getFeatureLen()], feat_len,
// sgemv(CblasRowMajor, CblasNoTrans, m, n, 1, data, n,
// ones.getData<float>(), 1, beta, ret.getData<float>(), 1);
if (GPUExecute) {
+#ifdef ENABLE_OPENCL
gpu_sgemv(data, (const float *)ones.getData<float>(),
ret.getData<float>(), 1.0f, beta, m, n);
+#else
+ ml_loge("%s", "Error: enable-opencl is not enabled");
+#endif
} else {
sgemv(CblasRowMajor, CblasNoTrans, m, n, 1, data, n,
ones.getData<float>(), 1, beta, ret.getData<float>(), 1);