Fix a bug in stack calculation.
authorRuiling Song <ruiling.song@intel.com>
Mon, 5 Aug 2013 07:14:39 +0000 (15:14 +0800)
committerZhigang Gong <zhigang.gong@linux.intel.com>
Tue, 6 Aug 2013 07:09:58 +0000 (15:09 +0800)
1. the thread_id is located in r0.5[0-8], so we need to get the correct bits.
2. also, we don't need so much stack size, max_compute_unit have already
   been treated as: #EU * max_thread_per_eu.

Signed-off-by: Ruiling Song <ruiling.song@intel.com>
Reviewed-by: Xing, Homer <homer.xing@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
backend/src/backend/gen_context.cpp
src/cl_command_queue_gen7.c

index dd8e1e0..ce6fe35 100644 (file)
@@ -118,7 +118,7 @@ namespace gbe
     p->push();
       p->curr.execWidth = 1;
       p->curr.predicate = GEN_PREDICATE_NONE;
-      p->SHR(GenRegister::ud1grf(126,0), GenRegister::ud1grf(0,5), GenRegister::immud(10));
+      p->AND(GenRegister::ud1grf(126,0), GenRegister::ud1grf(0,5), GenRegister::immud(0x1ff));
       p->curr.execWidth = this->simdWidth;
       p->SHL(stackptr, stackptr, GenRegister::immud(perLaneShift));
       p->curr.execWidth = 1;
index 048595c..8933213 100644 (file)
@@ -180,7 +180,6 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
   assert(offset >= 0);
   stack_sz *= gbe_kernel_get_simd_width(ker->opaque);
   stack_sz *= device->max_compute_unit;
-  stack_sz *= device->max_thread_per_unit;
   cl_gpgpu_set_stack(gpgpu, offset, stack_sz, cc_llc_l3);
 }