From 92949008963697d313e61090978a28a60139a3c1 Mon Sep 17 00:00:00 2001 From: Yang Rong Date: Mon, 29 Sep 2014 13:38:37 +0800 Subject: [PATCH] BDW: Correct scratch buffer of BDW. BDW's scratch buffer change to power 2 alignment from 1024. Signed-off-by: Yang Rong Reviewed-by: Zhigang Gong Reviewed-by: Junyan He --- backend/src/backend/gen8_context.cpp | 2 +- src/intel/intel_gpgpu.c | 22 ++++++++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index 663ee52..fbd4f9c 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -46,7 +46,7 @@ namespace gbe uint32_t Gen8Context::alignScratchSize(uint32_t size){ if(size == 0) return 0; - uint32_t i = 2048; + uint32_t i = 1024; while(i < size) i *= 2; return i; } diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index fa7333e..d65b1a2 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -342,16 +342,28 @@ uint32_t intel_gpgpu_get_scratch_index_gen7(uint32_t size) { } uint32_t intel_gpgpu_get_scratch_index_gen75(uint32_t size) { + //align in backend, if non pow2, must align when alloc scratch bo. + assert((size & (size - 1)) == 0); size = size >> 11; uint32_t index = 0; while((size >>= 1) > 0) index++; //get leading one - //non pow 2 size - if(size & (size - 1)) index++; return index; } +uint32_t intel_gpgpu_get_scratch_index_gen8(uint32_t size) { + //align in backend, if non pow2, must align when alloc scratch bo. + assert((size & (size - 1)) == 0); + size = size >> 10; + uint32_t index = 0; + while((size >>= 1) > 0) + index++; //get leading one + + return index; +} + + static cl_int intel_gpgpu_get_max_curbe_size(uint32_t device_id) { @@ -1142,7 +1154,9 @@ intel_gpgpu_build_idrt_gen8(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel) /* group_threads_num should not be set to 0 even if the barrier is disabled per bspec */ desc->desc6.group_threads_num = kernel->thread_n; desc->desc6.barrier_enable = kernel->use_slm; - if (slm_sz <= 4*KB) + if (slm_sz == 0) + slm_sz = 0; + else if (slm_sz <= 4*KB) slm_sz = 4*KB; else if (slm_sz <= 8*KB) slm_sz = 8*KB; @@ -1666,7 +1680,7 @@ intel_set_gpgpu_callbacks(int device_id) cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen75; intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8; cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen8; - intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen75; + intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen8; intel_gpgpu_post_action = intel_gpgpu_post_action_gen75; intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7; //HSW same as ivb intel_gpgpu_set_base_address = intel_gpgpu_set_base_address_gen8; -- 2.7.4