From 81a7569b8258771abf553378ee5768caaeb9d3f6 Mon Sep 17 00:00:00 2001 From: Yang Rong Date: Mon, 23 Sep 2013 14:04:08 +0800 Subject: [PATCH] Remove global offset need divide by local size restriction. Set to global offset to 0 in walker, and add global offset when get_global_id. Signed-off-by: Yang Rong Reviewed-by: Zhigang Gong --- backend/src/ocl_stdlib.tmpl.h | 2 +- src/cl_api.c | 7 ------- src/intel/intel_gpgpu.c | 6 +++--- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 9b76ba1..4921ee4 100644 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -588,7 +588,7 @@ DECL_PUBLIC_WORK_ITEM_FN(get_num_groups, 1) #undef DECL_PUBLIC_WORK_ITEM_FN INLINE uint get_global_id(uint dim) { - return get_local_id(dim) + get_local_size(dim) * get_group_id(dim); + return get_local_id(dim) + get_local_size(dim) * get_group_id(dim) + get_global_offset(dim); } ///////////////////////////////////////////////////////////////////////////// diff --git a/src/cl_api.c b/src/cl_api.c index e178eec..ded0e0c 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -2264,19 +2264,12 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue, goto error; } - /* Check offset values. We add a non standard restriction. The offsets must - * also be evenly divided by the local sizes - */ if (global_work_offset != NULL) for (i = 0; i < work_dim; ++i) { if (UNLIKELY(~0LL - global_work_offset[i] > global_work_size[i])) { err = CL_INVALID_GLOBAL_OFFSET; goto error; } - if (UNLIKELY(local_work_size != NULL && global_work_offset[i] % local_work_size[i])) { - err = CL_INVALID_GLOBAL_OFFSET; - goto error; - } } /* Local sizes must be non-null and divide global sizes */ diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 7b82b76..44f44ef 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -886,11 +886,11 @@ intel_gpgpu_walker(intel_gpgpu_t *gpgpu, OUT_BATCH(gpgpu->batch, (1 << 30) | (thread_n-1)); /* SIMD16 | thread max */ else OUT_BATCH(gpgpu->batch, (0 << 30) | (thread_n-1)); /* SIMD8 | thread max */ - OUT_BATCH(gpgpu->batch, global_wk_off[0]); + OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, global_wk_dim[0]); - OUT_BATCH(gpgpu->batch, global_wk_off[1]); + OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, global_wk_dim[1]); - OUT_BATCH(gpgpu->batch, global_wk_off[2]); + OUT_BATCH(gpgpu->batch, 0); OUT_BATCH(gpgpu->batch, global_wk_dim[2]); OUT_BATCH(gpgpu->batch, right_mask); OUT_BATCH(gpgpu->batch, ~0x0); /* we always set height as 1, so set bottom mask as all 1*/ -- 2.7.4