void * param_value,
size_t * param_value_size_ret)
{
- return cl_get_kernel_workgroup_info(device,
+ return cl_get_kernel_workgroup_info(kernel,
+ device,
param_name,
param_value_size,
param_value,
/* Handle the various offsets to SLM */
const int32_t arg_n = gbe_kernel_get_arg_num(ker->opaque);
int32_t arg, slm_offset = gbe_kernel_get_slm_size(ker->opaque);
+ ker->local_mem_sz = 0;
for (arg = 0; arg < arg_n; ++arg) {
const enum gbe_arg_type type = gbe_kernel_get_arg_type(ker->opaque, arg);
if (type != GBE_ARG_LOCAL_PTR)
uint32_t *slmptr = (uint32_t *) (ker->curbe + offset);
*slmptr = slm_offset;
slm_offset += ker->args[arg].local_sz;
+ ker->local_mem_sz += ker->args[arg].local_sz;
}
-
return slm_offset;
}
sizeof(((cl_device_id)NULL)->FIELD)); \
return CL_SUCCESS;
+#include "cl_kernel.h"
LOCAL cl_int
-cl_get_kernel_workgroup_info(cl_device_id device,
+cl_get_kernel_workgroup_info(cl_kernel kernel,
+ cl_device_id device,
cl_kernel_work_group_info param_name,
size_t param_value_size,
void* param_value,
switch (param_name) {
DECL_FIELD(WORK_GROUP_SIZE, wg_sz)
DECL_FIELD(COMPILE_WORK_GROUP_SIZE, compile_wg_sz)
+ DECL_FIELD(PREFERRED_WORK_GROUP_SIZE_MULTIPLE, preferred_wg_sz_mul)
+ case CL_KERNEL_LOCAL_MEM_SIZE:
+ if (param_value_size < sizeof(cl_ulong))
+ return CL_INVALID_VALUE;
+ if (param_value_size_ret != NULL)
+ *param_value_size_ret = sizeof(cl_ulong);
+ *(cl_ulong*)param_value = gbe_kernel_get_slm_size(kernel->opaque) + kernel->local_mem_sz;
+ return CL_SUCCESS;
default: return CL_INVALID_VALUE;
};
}
/* Kernel specific info that we're assigning statically */
size_t wg_sz;
size_t compile_wg_sz[3];
+ size_t preferred_wg_sz_mul;
};
/* Get a device from the given platform */
void * param_value,
size_t * param_value_size_ret);
-extern cl_int cl_get_kernel_workgroup_info(cl_device_id device,
- cl_kernel_work_group_info param_name,
- size_t param_value_size,
- void * param_value,
- size_t * param_value_size_ret);
+extern cl_int cl_get_kernel_workgroup_info(cl_kernel kernel,
+ cl_device_id device,
+ cl_kernel_work_group_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret);
/* Returns the Gen device ID */
extern cl_int cl_device_get_version(cl_device_id device, cl_int *ver);
.native_vector_width_float = 16,
.native_vector_width_double = 16,
.native_vector_width_half = 16,
+.preferred_wg_sz_mul = 16,
.address_bits = 32,
.max_mem_alloc_size = 128 * 1024 * 1024,
.image_support = CL_TRUE,
size_t sampler_sz; /* sampler size defined in kernel & kernel args. */
struct ImageInfo *images; /* images defined in kernel args */
size_t image_sz; /* image count in kernel args */
+ cl_ulong local_mem_sz; /* local memory size specified in kernel args. */
cl_argument *args; /* To track argument setting */
uint32_t arg_n:31; /* Number of arguments */
uint32_t ref_its_program:1; /* True only for the user kernel (created by clCreateKernel) */