[GPU/OpenCL] Create kernel utility with binaries
authorDebadri Samaddar <s.debadri@samsung.com>
Thu, 4 Apr 2024 09:21:10 +0000 (14:51 +0530)
committerJijoong Moon <jijoong.moon@samsung.com>
Thu, 18 Apr 2024 07:56:33 +0000 (16:56 +0900)
Added feature for reading kernel binaries.
Managing already created kernels.
Added static flag and bitmask to check existing kernels.

Signed-off-by: Debadri Samaddar <s.debadri@samsung.com>
nntrainer/layers/layer_context.cpp
nntrainer/layers/layer_context.h

index 04bc576c38f33516e1f4644cdf5eca3a91059e8c..4ed5c90b6ed74e88e7fb77e7553178a5a98e19ed 100644 (file)
@@ -16,6 +16,7 @@
 #include <memory>
 #include <tensor_wrap_specs.h>
 
+#include <fstream>
 #include <iterator>
 #include <layer_context.h>
 #include <nntrainer_log.h>
@@ -567,35 +568,94 @@ bool RunLayerContext::validate(bool skip_input, bool skip_label) {
 }
 
 #ifdef ENABLE_OPENCL
+
+/**
+ * @brief Global bit mask to check if kernel already initialized.
+ */
+unsigned int RunLayerContext::kernelInitializedMask = 0;
+
+/**
+ * @brief create OpenCl kernel
+ * @param kernel_string implementation string
+ * @param layerKernel LayerKernel
+ * @return true if kernel creation is successful, false otherwise
+ */
 bool RunLayerContext::clCreateKernel(std::string kernel_string,
-                                     std::string kernel_name) {
-  if (kernel_initialized) {
-    ml_logi("Kernel already initialized: %s", kernel_name.c_str());
+                                     LayerKernel layerKernel,
+                                     opencl::Kernel &kernel_) {
+
+  // checking bitmask for already initialized kernel. eg: 010 & 000 -> 0 but 010
+  // & 110 -> 010
+  if (layerKernel == (kernelInitializedMask & layerKernel)) {
+    ml_logi("Kernel already initialized: %s",
+            getKernelName(layerKernel).c_str());
     return true;
   }
 
-  ml_logi("Kernel initializing: %s", kernel_name.c_str());
+  ml_logi("Kernel initializing: %s", getKernelName(layerKernel).c_str());
 
   bool result = false;
 
   do {
     opencl::Program program;
-    result =
-      program.CreateCLProgram(context_inst_.GetContext(),
-                              context_inst_.GetDeviceId(), kernel_string, "");
+
+    // reading binary
+    std::ifstream fs(opencl::Program::DEFAULT_KERNEL_PATH + "/" +
+                       getKernelName(layerKernel) + "_kernel.bin",
+                     std::ios::binary | std::ios::in);
+
+    if (fs) {
+      fs.seekg(0, std::ios::end);
+      size_t binary_size = fs.tellg();
+      fs.seekg(0, std::ios::beg);
+
+      unsigned char chunk[binary_size];
+      fs.read((char *)chunk, binary_size);
+
+      result = program.CreateCLProgramWithBinary(
+        context_inst_.GetContext(), context_inst_.GetDeviceId(), binary_size,
+        chunk,
+        opencl::Program::DEFAULT_KERNEL_PATH + "/" +
+          getKernelName(layerKernel) + "_kernel.bin",
+        "");
+    } else {
+      result =
+        program.CreateCLProgram(context_inst_.GetContext(),
+                                context_inst_.GetDeviceId(), kernel_string, "");
+    }
+
     if (!result) {
       break;
     }
 
-    result = kernel_.CreateKernelFromProgram(program, kernel_name);
+    result =
+      kernel_.CreateKernelFromProgram(program, getKernelName(layerKernel));
     if (!result) {
       break;
     }
-    kernel_initialized = true;
+
+    // setting bitmask for current initialized kernel. eg: 010 | 000 -> 010
+    kernelInitializedMask = kernelInitializedMask | layerKernel;
   } while (false);
 
   return result;
 }
+
+/**
+ * @brief Resolve kernel name from LayerKernel enum
+ * @param layerKernel enumerator of type LayerKernel
+ * @return string name of kernel
+ */
+std::string RunLayerContext::getKernelName(LayerKernel layerKernel) {
+  switch (layerKernel) {
+  case LayerKernel::KERNEL_NAME1:
+    return "kernel_name1";
+  case LayerKernel::KERNEL_NAME2:
+    return "kernel_name2";
+  default:
+    return "";
+  }
+}
 #endif
 
 } // namespace nntrainer
index 3278cf0f24438bce443dedb9dd88e65e8257c9b8..ea38ecc5f73abb5706d34519dd0d9526305b42f7 100644 (file)
@@ -25,6 +25,7 @@
 #include <weight.h>
 
 #ifdef ENABLE_OPENCL
+#include <opencl_command_queue_manager.h>
 #include <opencl_context_manager.h>
 #include <opencl_kernel.h>
 #include <opencl_program.h>
@@ -808,24 +809,42 @@ public:
   std::vector<Weight *> getWeights() { return weights; }
 
 #ifdef ENABLE_OPENCL
-
-  // getting static instances of commandqueue, context and kernel
+  // getting static instances of commandqueue and context
   opencl::ContextManager &context_inst_ = opencl::ContextManager::GetInstance();
-  opencl::Kernel kernel_;
+  opencl::CommandQueueManager &command_queue_inst_ =
+    opencl::CommandQueueManager::GetInstance();
+
+  /**
+   * @brief Enumerator for implemented OpenCL layer kernels. Used for resolving
+   * kernelInitializedMask. All kernels should be added with value as enum index
+   * to the power of 2 (e.g: 1, 2, 4, 8 ...). Should also be resolved in
+   * getKernelName function.
+   */
+  enum LayerKernel {
+    KERNEL_NAME1 = 1, /**< placeholder for kernel name */
+    KERNEL_NAME2 = 2  /**< placeholder for kernel name */
+  };
+
+  /**
+   * @brief Global bit mask to check if kernel already initialized.
+   */
+  static unsigned int kernelInitializedMask;
 
   /**
    * @brief create OpenCl kernel
-   * @param kernel implementation string
-   * @param kernel name
+   * @param kernel_string implementation string
+   * @param layerKernel LayerKernel
+   * @param kernel_ reference of Kernel
    * @return true if kernel creation is successful, false otherwise
    */
-  bool clCreateKernel(std::string kernel_string, std::string kernel_name);
+  bool clCreateKernel(std::string kernel_string, LayerKernel layerKernel,
+                      opencl::Kernel &kernel_);
 
   /**
    * @brief destructor to release opencl context
    */
   ~RunLayerContext() {
-    if (kernel_initialized) {
+    if (kernelInitializedMask > 0) {
       context_inst_.ReleaseContext();
     }
   }
@@ -859,9 +878,6 @@ private:
   ml::train::LayerComputeEngine compute_engine =
     ml::train::LayerComputeEngine::CPU;
 
-  // flag to check whether opencl kernel is initialized or not
-  bool kernel_initialized = false;
-
 #ifdef DEBUG
   std::map<std::string, const void *>
     tensor_map; /**< map of tensor name to tensor address */
@@ -874,6 +890,15 @@ private:
    * @return float Value of the loss
    */
   float getWeightRegularizationLoss(unsigned int idx) const;
+
+#ifdef ENABLE_OPENCL
+  /**
+   * @brief Resolve kernel name from LayerKernel enum
+   * @param layerKernel enumerator of type LayerKernel
+   * @return string name of kernel
+   */
+  std::string getKernelName(LayerKernel layerKernel);
+#endif
 };
 
 } // namespace nntrainer