Register custom kernels as well as in-house kernels at cl_context initialization
Signed-off-by: Debadri Samaddar <s.debadri@samsung.com>
*/
std::unique_ptr<Layer>
createLayer(const std::string &type,
- const std::vector<std::string> &properties = {});
+ const std::vector<std::string> &properties = {},
+ const LayerComputeEngine &compute_engine = LayerComputeEngine::CPU);
/**
* @brief General Layer Factory function to register Layer
* @brief Factory creator with constructor for layer
*/
std::unique_ptr<Layer> createLayer(const std::string &type,
- const std::vector<std::string> &properties) {
- return nntrainer::createLayerNode(type, properties);
+ const std::vector<std::string> &properties,
+ const LayerComputeEngine &compute_engine) {
+ return nntrainer::createLayerNode(type, properties, compute_engine);
}
std::unique_ptr<Optimizer>
return assigned_int_key;
}
+const ClContext::SharedPtrClKernel &
+ClContext::registerClKernel(std::string kernel_string,
+ std::string kernel_name) {
+ // check if created before
+ if (ocl_kernel_map.find(kernel_name) != ocl_kernel_map.end()) {
+ ml_logi("Kernel already registered and initialized: %s",
+ kernel_name.c_str());
+ return ocl_kernel_map[kernel_name];
+ }
+
+ // creating shared_ptr for kernel object
+ SharedPtrClKernel kernelPtr = std::make_shared<opencl::Kernel>();
+ if (!clCreateKernel(kernel_string, kernel_name, kernelPtr)) {
+ ml_loge("Failed to register kernel %s", kernel_name.c_str());
+ return nullptr;
+ }
+ // add to map
+ ocl_kernel_map.emplace(kernel_name, kernelPtr);
+ return ocl_kernel_map[kernel_name];
+}
+
+bool ClContext::clCreateKernel(std::string &kernel_string,
+ std::string &kernel_name,
+ const SharedPtrClKernel &kernel_ptr_) {
+
+ ml_logi("Kernel initializing: %s", kernel_name.c_str());
+
+ bool result = false;
+
+ do {
+ opencl::Program program;
+
+ // reading binary
+ std::ifstream fs(opencl::Program::DEFAULT_KERNEL_PATH + "/" + kernel_name +
+ "_kernel.bin",
+ std::ios::binary | std::ios::in);
+
+ if (fs.good()) {
+ fs.seekg(0, std::ios::end);
+ size_t binary_size = fs.tellg();
+ fs.seekg(0, std::ios::beg);
+
+ unsigned char chunk[binary_size];
+ fs.read((char *)chunk, binary_size);
+
+ result = program.CreateCLProgramWithBinary(
+ context_inst_.GetContext(), context_inst_.GetDeviceId(), binary_size,
+ chunk,
+ opencl::Program::DEFAULT_KERNEL_PATH + "/" + kernel_name +
+ "_kernel.bin",
+ "");
+ } else {
+ result =
+ program.CreateCLProgram(context_inst_.GetContext(),
+ context_inst_.GetDeviceId(), kernel_string, "");
+ }
+
+ if (!result) {
+ break;
+ }
+
+ result = kernel_ptr_->CreateKernelFromProgram(program, kernel_name);
+ if (!result) {
+ break;
+ }
+
+ } while (false);
+
+ return result;
+}
+
/**
* @copydoc const int ClContext::registerFactory
*/
#include <layer_devel.h>
#include <opencl_command_queue_manager.h>
+#include <opencl_context_manager.h>
+#include <opencl_kernel.h>
+#include <opencl_program.h>
#include <nntrainer_log.h>
template <typename T> using PtrType = std::unique_ptr<T>;
+ using SharedPtrClKernel = std::shared_ptr<opencl::Kernel>;
+
template <typename T>
using FactoryType = std::function<PtrType<T>(const PropsType &)>;
/** integer to string key */
using IntIndexType = std::unordered_map<int, std::string>;
+ /** string to kernel pointer map*/
+ using OclKernelMap = std::unordered_map<std::string, SharedPtrClKernel>;
+
/**
* This type contains tuple of
* 1) integer -> string index
template <typename... Ts> using FactoryMap = std::tuple<IndexType<Ts>...>;
+ // getting static instance of commandqueue and opencl context
+ opencl::CommandQueueManager &command_queue_inst_ =
+ opencl::CommandQueueManager::GetInstance();
+
+ opencl::ContextManager &context_inst_ = opencl::ContextManager::GetInstance();
+
/**
* @brief Default constructor
*/
return entry->second(props);
}
+ /**
+ * @brief register or return already present OpenCl kernel pointer
+ * @param kernel_string kernel implementation string
+ * @param kernel_name kernel name
+ * @return reference of std::shared_ptr<opencl::Kernel>
+ */
+ const SharedPtrClKernel ®isterClKernel(std::string kernel_string,
+ std::string kernel_name);
+
/**
* @brief destructor to release opencl commandQueue
*/
~ClContext() {
if (cl_initialized) {
command_queue_inst_.ReleaseCommandQueue();
+ // getContext() is called by clCreateKernel
+ context_inst_.ReleaseContext();
}
};
template <typename Args, typename T> struct isSupportedHelper;
+ // map to store initialized opencl::Kernel
+ OclKernelMap ocl_kernel_map;
+
/**
* @brief supportHelper to check if given type is supported within cl context
*/
template <typename T>
struct isSupported : isSupportedHelper<T, decltype(factory_map)> {};
- // getting static instance of commandqueue
- opencl::CommandQueueManager &command_queue_inst_ =
- opencl::CommandQueueManager::GetInstance();
-
/**
* @brief Initialize opencl commandqueue and context
* @return true if OpenCL context and command queue creation is successful,
cl_initialized = result;
return cl_initialized;
};
+
+ /**
+ * @brief create OpenCl kernel
+ * @param kernel_string reference of implementation string
+ * @param kernel_name reference of kernel_name
+ * @param kernel_ptr_ reference of shared_ptr of Kernel
+ * @return true if successful, false otherwise
+ */
+ bool clCreateKernel(std::string &kernel_string, std::string &kernel_name,
+ const SharedPtrClKernel &kernel_ptr_);
};
/**
return true;
}
+bool CommandQueueManager::DispatchCommand(
+ const std::shared_ptr<Kernel> &kernel_ptr, const int (&work_groups_count)[3],
+ const int (&work_group_size)[3], cl_event *event) {
+
+ // work_dim of 2 has been hardcoded, might be modified later based on
+ // requirements
+
+ // setting the local_work_size referred to as the size of the
+ // work-group
+ const size_t local[2] = {static_cast<size_t>(work_group_size[0]),
+ static_cast<size_t>(work_group_size[1])};
+
+ // setting the global_work_size that describe the number of global work-items
+ const size_t global[2] = {static_cast<size_t>(work_groups_count[0]),
+ static_cast<size_t>(work_groups_count[1])};
+
+ cl_kernel kernel_ = kernel_ptr->GetKernel();
+
+ // returns NULL with error code if fails
+ const int error_code = clEnqueueNDRangeKernel(
+ command_queue_, kernel_, 2, nullptr, global, local, 0, nullptr, event);
+ if (error_code != CL_SUCCESS) {
+ ml_loge("Failed to clEnqueueNDRangeKernel. OpenCL error code: %d",
+ error_code);
+ return false;
+ }
+
+ return true;
+}
+
} // namespace nntrainer::opencl
#include "opencl_kernel.h"
#include "third_party/cl.h"
+#include <memory>
namespace nntrainer::opencl {
const int (&work_group_size)[3],
cl_event *event = nullptr);
+ /**
+ * @brief Overloaded function to initiate execution of the command queue.
+ *
+ * @param kernel_ptr reference of OpenCL kernel shared_ptr
+ * @param work_groups_count Total number of work items that will execute the
+ * kernel function
+ * @param work_group_size Number of work items that make up a work group
+ * @param event Object that identifies this command and can be used to query
+ * or wait for this command to complete
+ * @return true if command queue execution is successful or false otherwise
+ */
+ bool DispatchCommand(const std::shared_ptr<Kernel> &kernel_ptr,
+ const int (&work_groups_count)[3],
+ const int (&work_group_size)[3],
+ cl_event *event = nullptr);
+
/**
* @brief Get the OpenCL Command Queue object
*
bool result = false;
do {
- result = context.clCreateKernel(sscal_cl_kernel_,
- context.LayerKernel::SSCAL, kernel_sscal);
- if (!result) {
+ auto &cc = ClContext::Global();
+ ClContext::SharedPtrClKernel kernel_ptr =
+ cc.registerClKernel(sscal_cl_kernel_, "sscal_cl");
+
+ if (!kernel_ptr) {
break;
}
break;
}
- result = kernel_sscal.SetKernelArguments(0, &inputX, sizeof(cl_mem));
+ result = kernel_ptr->SetKernelArguments(0, &inputX, sizeof(cl_mem));
if (!result) {
break;
}
- result = kernel_sscal.SetKernelArguments(1, &alpha, sizeof(float));
+ result = kernel_ptr->SetKernelArguments(1, &alpha, sizeof(float));
if (!result) {
break;
}
const int work_group_size[3] = {32, 32, 1}; // test-value
result = context.command_queue_inst_.DispatchCommand(
- kernel_sscal, work_groups_count, work_group_size);
+ kernel_ptr, work_groups_count, work_group_size);
if (!result) {
break;
}
#ifndef __BLAS_KERNELS_H__
#define __BLAS_KERNELS_H__
+#include <cl_context.h>
#include <layer_context.h>
#include <opencl_buffer.h>
#include <opencl_kernel.h>