Runtime: complete the api clGetKernelWorkGroupInfo.
authorZhigang Gong <zhigang.gong@intel.com>
Mon, 11 Nov 2013 08:20:26 +0000 (16:20 +0800)
committerZhigang Gong <zhigang.gong@intel.com>
Mon, 11 Nov 2013 08:40:10 +0000 (16:40 +0800)
Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
src/cl_api.c
src/cl_command_queue_gen7.c
src/cl_device_id.c
src/cl_device_id.h
src/cl_gt_device.h
src/cl_kernel.h

index d04ff00..d15354b 100644 (file)
@@ -1108,7 +1108,8 @@ clGetKernelWorkGroupInfo(cl_kernel                   kernel,
                          void *                      param_value,
                          size_t *                    param_value_size_ret)
 {
-  return cl_get_kernel_workgroup_info(device,
+  return cl_get_kernel_workgroup_info(kernel,
+                                      device,
                                       param_name,
                                       param_value_size,
                                       param_value,
index 0788975..9faaafa 100644 (file)
@@ -213,6 +213,7 @@ cl_curbe_fill(cl_kernel ker,
   /* Handle the various offsets to SLM */
   const int32_t arg_n = gbe_kernel_get_arg_num(ker->opaque);
   int32_t arg, slm_offset = gbe_kernel_get_slm_size(ker->opaque);
+  ker->local_mem_sz = 0;
   for (arg = 0; arg < arg_n; ++arg) {
     const enum gbe_arg_type type = gbe_kernel_get_arg_type(ker->opaque, arg);
     if (type != GBE_ARG_LOCAL_PTR)
@@ -225,8 +226,8 @@ cl_curbe_fill(cl_kernel ker,
     uint32_t *slmptr = (uint32_t *) (ker->curbe + offset);
     *slmptr = slm_offset;
     slm_offset += ker->args[arg].local_sz;
+    ker->local_mem_sz += ker->args[arg].local_sz;
   }
-
   return slm_offset;
 }
 
index 108cfdf..acc91e9 100644 (file)
@@ -273,8 +273,10 @@ cl_device_get_version(cl_device_id device, cl_int *ver)
              sizeof(((cl_device_id)NULL)->FIELD));                  \
         return CL_SUCCESS;
 
+#include "cl_kernel.h"
 LOCAL cl_int
-cl_get_kernel_workgroup_info(cl_device_id device,
+cl_get_kernel_workgroup_info(cl_kernel kernel,
+                             cl_device_id device,
                              cl_kernel_work_group_info param_name,
                              size_t param_value_size,
                              void* param_value,
@@ -289,6 +291,14 @@ cl_get_kernel_workgroup_info(cl_device_id device,
   switch (param_name) {
     DECL_FIELD(WORK_GROUP_SIZE, wg_sz)
     DECL_FIELD(COMPILE_WORK_GROUP_SIZE, compile_wg_sz)
+    DECL_FIELD(PREFERRED_WORK_GROUP_SIZE_MULTIPLE, preferred_wg_sz_mul)
+    case CL_KERNEL_LOCAL_MEM_SIZE:
+      if (param_value_size < sizeof(cl_ulong))
+        return CL_INVALID_VALUE;
+      if (param_value_size_ret != NULL)
+        *param_value_size_ret = sizeof(cl_ulong);
+      *(cl_ulong*)param_value = gbe_kernel_get_slm_size(kernel->opaque) + kernel->local_mem_sz;
+      return CL_SUCCESS;
     default: return CL_INVALID_VALUE;
   };
 }
index 1beff92..56ffd33 100644 (file)
@@ -96,6 +96,7 @@ struct _cl_device_id {
   /* Kernel specific info that we're assigning statically */
   size_t wg_sz;
   size_t compile_wg_sz[3];
+  size_t preferred_wg_sz_mul;
 };
 
 /* Get a device from the given platform */
@@ -115,11 +116,12 @@ extern cl_int cl_get_device_info(cl_device_id     device,
                                  void *           param_value,
                                  size_t *         param_value_size_ret);
 
-extern cl_int cl_get_kernel_workgroup_info(cl_device_id     device,
-                                 cl_kernel_work_group_info   param_name,
-                                 size_t           param_value_size,
-                                 void *           param_value,
-                                 size_t *         param_value_size_ret);
+extern cl_int cl_get_kernel_workgroup_info(cl_kernel kernel,
+                                           cl_device_id     device,
+                                           cl_kernel_work_group_info   param_name,
+                                           size_t           param_value_size,
+                                           void *           param_value,
+                                           size_t *         param_value_size_ret);
 /* Returns the Gen device ID */
 extern cl_int cl_device_get_version(cl_device_id device, cl_int *ver);
 
index 73db606..e8fa362 100644 (file)
@@ -35,6 +35,7 @@
 .native_vector_width_float = 16,
 .native_vector_width_double = 16,
 .native_vector_width_half = 16,
+.preferred_wg_sz_mul = 16,
 .address_bits = 32,
 .max_mem_alloc_size = 128 * 1024 * 1024,
 .image_support = CL_TRUE,
index e191058..acb7206 100644 (file)
@@ -56,6 +56,7 @@ struct _cl_kernel {
   size_t sampler_sz;          /* sampler size defined in kernel & kernel args. */
   struct ImageInfo *images;   /* images defined in kernel args */
   size_t image_sz;            /* image count in kernel args */
+  cl_ulong local_mem_sz;      /* local memory size specified in kernel args. */
   cl_argument *args;          /* To track argument setting */
   uint32_t arg_n:31;          /* Number of arguments */
   uint32_t ref_its_program:1; /* True only for the user kernel (created by clCreateKernel) */