From: Zhigang Gong Date: Fri, 14 Feb 2014 08:11:36 +0000 (+0800) Subject: CL: make the scratch size as a device resource attribute. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9bcd8b0fb9a0cecbba06a33315c5b076ffb54f89;p=contrib%2Fbeignet.git CL: make the scratch size as a device resource attribute. Actually, the scratch size is much like the local memory size which should be a device dependent information. This patch is to put scratch mem size to the device attribute structure. And when the kernel needs more than the maximum scratch memory, we just return a out-of-resource error rather than trigger an assertion. Signed-off-by: Zhigang Gong Reviewed-by: Song, Ruiling --- diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 83fe21d..348211c 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -255,15 +255,6 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker) cl_gpgpu_set_stack(gpgpu, offset, stack_sz, cc_llc_l3); } -static void -cl_setup_scratch(cl_gpgpu gpgpu, cl_kernel ker) -{ - int32_t scratch_sz = gbe_kernel_get_scratch_size(ker->opaque); - /* Per HW Spec, it only allows 12KB scratch memory per HW thread now */ - assert(scratch_sz <= 12*1024); - cl_gpgpu_set_scratch(gpgpu, scratch_sz); -} - LOCAL cl_int cl_command_queue_ND_range_gen7(cl_command_queue queue, cl_kernel ker, @@ -279,6 +270,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue, const uint32_t simd_sz = cl_kernel_get_simd_width(ker); size_t i, batch_sz = 0u, local_sz = 0u; size_t cst_sz = ker->curbe_sz= gbe_kernel_get_curbe_size(ker->opaque); + int32_t scratch_sz = gbe_kernel_get_scratch_size(ker->opaque); size_t thread_n = 0u; cl_int err = CL_SUCCESS; @@ -295,11 +287,17 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue, kernel.thread_n = thread_n = (local_sz + simd_sz - 1) / simd_sz; kernel.curbe_sz = cst_sz; + if (scratch_sz > ker->program->ctx->device->scratch_mem_size) { + fprintf(stderr, "Beignet: Out of scratch memory %d.\n", scratch_sz); + return CL_OUT_OF_RESOURCES; + } /* Curbe step 1: fill the constant urb buffer data shared by all threads */ if (ker->curbe) { kernel.slm_sz = cl_curbe_fill(ker, work_dim, global_wk_off, global_wk_sz, local_wk_sz, thread_n); - if (kernel.slm_sz > ker->program->ctx->device->local_mem_size) + if (kernel.slm_sz > ker->program->ctx->device->local_mem_size) { + fprintf(stderr, "Beignet: Out of shared local memory %d.\n", kernel.slm_sz); return CL_OUT_OF_RESOURCES; + } } /* Setup the kernel */ @@ -315,7 +313,8 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue, /* Bind all samplers */ cl_gpgpu_bind_sampler(gpgpu, ker->samplers, ker->sampler_sz); - cl_setup_scratch(gpgpu, ker); + cl_gpgpu_set_scratch(gpgpu, scratch_sz); + /* Bind a stack if needed */ cl_bind_stack(gpgpu, ker); diff --git a/src/cl_device_id.h b/src/cl_device_id.h index 4ece26c..5f7c9fe 100644 --- a/src/cl_device_id.h +++ b/src/cl_device_id.h @@ -68,6 +68,7 @@ struct _cl_device_id { cl_uint max_constant_args; cl_device_local_mem_type local_mem_type; cl_ulong local_mem_size; + cl_ulong scratch_mem_size; cl_bool error_correction_support; cl_bool host_unified_memory; size_t profiling_timer_resolution; diff --git a/src/cl_gen75_device.h b/src/cl_gen75_device.h index 7bf662e..682ee06 100644 --- a/src/cl_gen75_device.h +++ b/src/cl_gen75_device.h @@ -24,6 +24,7 @@ .global_mem_cache_size = 8 << 10, /* XXX */ .local_mem_type = CL_GLOBAL, .local_mem_size = 64 << 10, +.scratch_mem_size = 2 << 20, #include "cl_gt_device.h" diff --git a/src/cl_gen7_device.h b/src/cl_gen7_device.h index e198d6f..69cc0b9 100644 --- a/src/cl_gen7_device.h +++ b/src/cl_gen7_device.h @@ -23,6 +23,7 @@ .global_mem_cache_size = 8 << 10, /* XXX */ .local_mem_type = CL_GLOBAL, .local_mem_size = 64 << 10, +.scratch_mem_size = 12 << 10, #include "cl_gt_device.h"