#include <memory>
#include <tensor_wrap_specs.h>
+#include <fstream>
#include <iterator>
#include <layer_context.h>
#include <nntrainer_log.h>
}
#ifdef ENABLE_OPENCL
+
+/**
+ * @brief Global bit mask to check if kernel already initialized.
+ */
+unsigned int RunLayerContext::kernelInitializedMask = 0;
+
+/**
+ * @brief create OpenCl kernel
+ * @param kernel_string implementation string
+ * @param layerKernel LayerKernel
+ * @return true if kernel creation is successful, false otherwise
+ */
bool RunLayerContext::clCreateKernel(std::string kernel_string,
- std::string kernel_name) {
- if (kernel_initialized) {
- ml_logi("Kernel already initialized: %s", kernel_name.c_str());
+ LayerKernel layerKernel,
+ opencl::Kernel &kernel_) {
+
+ // checking bitmask for already initialized kernel. eg: 010 & 000 -> 0 but 010
+ // & 110 -> 010
+ if (layerKernel == (kernelInitializedMask & layerKernel)) {
+ ml_logi("Kernel already initialized: %s",
+ getKernelName(layerKernel).c_str());
return true;
}
- ml_logi("Kernel initializing: %s", kernel_name.c_str());
+ ml_logi("Kernel initializing: %s", getKernelName(layerKernel).c_str());
bool result = false;
do {
opencl::Program program;
- result =
- program.CreateCLProgram(context_inst_.GetContext(),
- context_inst_.GetDeviceId(), kernel_string, "");
+
+ // reading binary
+ std::ifstream fs(opencl::Program::DEFAULT_KERNEL_PATH + "/" +
+ getKernelName(layerKernel) + "_kernel.bin",
+ std::ios::binary | std::ios::in);
+
+ if (fs) {
+ fs.seekg(0, std::ios::end);
+ size_t binary_size = fs.tellg();
+ fs.seekg(0, std::ios::beg);
+
+ unsigned char chunk[binary_size];
+ fs.read((char *)chunk, binary_size);
+
+ result = program.CreateCLProgramWithBinary(
+ context_inst_.GetContext(), context_inst_.GetDeviceId(), binary_size,
+ chunk,
+ opencl::Program::DEFAULT_KERNEL_PATH + "/" +
+ getKernelName(layerKernel) + "_kernel.bin",
+ "");
+ } else {
+ result =
+ program.CreateCLProgram(context_inst_.GetContext(),
+ context_inst_.GetDeviceId(), kernel_string, "");
+ }
+
if (!result) {
break;
}
- result = kernel_.CreateKernelFromProgram(program, kernel_name);
+ result =
+ kernel_.CreateKernelFromProgram(program, getKernelName(layerKernel));
if (!result) {
break;
}
- kernel_initialized = true;
+
+ // setting bitmask for current initialized kernel. eg: 010 | 000 -> 010
+ kernelInitializedMask = kernelInitializedMask | layerKernel;
} while (false);
return result;
}
+
+/**
+ * @brief Resolve kernel name from LayerKernel enum
+ * @param layerKernel enumerator of type LayerKernel
+ * @return string name of kernel
+ */
+std::string RunLayerContext::getKernelName(LayerKernel layerKernel) {
+ switch (layerKernel) {
+ case LayerKernel::KERNEL_NAME1:
+ return "kernel_name1";
+ case LayerKernel::KERNEL_NAME2:
+ return "kernel_name2";
+ default:
+ return "";
+ }
+}
#endif
} // namespace nntrainer
#include <weight.h>
#ifdef ENABLE_OPENCL
+#include <opencl_command_queue_manager.h>
#include <opencl_context_manager.h>
#include <opencl_kernel.h>
#include <opencl_program.h>
std::vector<Weight *> getWeights() { return weights; }
#ifdef ENABLE_OPENCL
-
- // getting static instances of commandqueue, context and kernel
+ // getting static instances of commandqueue and context
opencl::ContextManager &context_inst_ = opencl::ContextManager::GetInstance();
- opencl::Kernel kernel_;
+ opencl::CommandQueueManager &command_queue_inst_ =
+ opencl::CommandQueueManager::GetInstance();
+
+ /**
+ * @brief Enumerator for implemented OpenCL layer kernels. Used for resolving
+ * kernelInitializedMask. All kernels should be added with value as enum index
+ * to the power of 2 (e.g: 1, 2, 4, 8 ...). Should also be resolved in
+ * getKernelName function.
+ */
+ enum LayerKernel {
+ KERNEL_NAME1 = 1, /**< placeholder for kernel name */
+ KERNEL_NAME2 = 2 /**< placeholder for kernel name */
+ };
+
+ /**
+ * @brief Global bit mask to check if kernel already initialized.
+ */
+ static unsigned int kernelInitializedMask;
/**
* @brief create OpenCl kernel
- * @param kernel implementation string
- * @param kernel name
+ * @param kernel_string implementation string
+ * @param layerKernel LayerKernel
+ * @param kernel_ reference of Kernel
* @return true if kernel creation is successful, false otherwise
*/
- bool clCreateKernel(std::string kernel_string, std::string kernel_name);
+ bool clCreateKernel(std::string kernel_string, LayerKernel layerKernel,
+ opencl::Kernel &kernel_);
/**
* @brief destructor to release opencl context
*/
~RunLayerContext() {
- if (kernel_initialized) {
+ if (kernelInitializedMask > 0) {
context_inst_.ReleaseContext();
}
}
ml::train::LayerComputeEngine compute_engine =
ml::train::LayerComputeEngine::CPU;
- // flag to check whether opencl kernel is initialized or not
- bool kernel_initialized = false;
-
#ifdef DEBUG
std::map<std::string, const void *>
tensor_map; /**< map of tensor name to tensor address */
* @return float Value of the loss
*/
float getWeightRegularizationLoss(unsigned int idx) const;
+
+#ifdef ENABLE_OPENCL
+ /**
+ * @brief Resolve kernel name from LayerKernel enum
+ * @param layerKernel enumerator of type LayerKernel
+ * @return string name of kernel
+ */
+ std::string getKernelName(LayerKernel layerKernel);
+#endif
};
} // namespace nntrainer