HSW: Fix potential issue of GT3 when calc stack address.

author Yang Rong <rong.r.yang@intel.com>

Thu, 12 Jun 2014 11:42:12 +0000 (19:42 +0800)

committer Zhigang Gong <zhigang.gong@intel.com>

Thu, 12 Jun 2014 05:56:31 +0000 (13:56 +0800)
author Yang Rong <rong.r.yang@intel.com>
Thu, 12 Jun 2014 11:42:12 +0000 (19:42 +0800)
committer Zhigang Gong <zhigang.gong@intel.com>
Thu, 12 Jun 2014 05:56:31 +0000 (13:56 +0800)
diff --git a/backend/src/backend/gen75_context.cpp b/backend/src/backend/gen75_context.cpp

index aedd4d3..da0db85 100644 (file)
--- a/backend/src/backend/gen75_context.cpp
+++ b/backend/src/backend/gen75_context.cpp
@@ -92,12 +92,12 @@ namespace gbe
        p->curr.predicate = GEN_PREDICATE_NONE;
        //p->AND(GenRegister::ud1grf(126,0), GenRegister::ud1grf(0,5), GenRegister::immud(0x1ff));
        p->AND(GenRegister::ud1grf(126,0), GenRegister::ud1grf(0,5), GenRegister::immud(0x7f));
-      p->AND(GenRegister::ud1grf(126,4), GenRegister::ud1grf(0,5), GenRegister::immud(0x80));
+      p->AND(GenRegister::ud1grf(126,4), GenRegister::ud1grf(0,5), GenRegister::immud(0x180));
        p->SHR(GenRegister::ud1grf(126,4), GenRegister::ud1grf(126, 4), GenRegister::immud(7));
        p->curr.execWidth = this->simdWidth;
        p->SHL(stackptr, stackptr, GenRegister::immud(perLaneShift));
        p->curr.execWidth = 1;
-      p->SHL(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), GenRegister::immud(1));
+      p->SHL(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), GenRegister::immud(2));
        p->ADD(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), GenRegister::ud1grf(126, 4));
        p->SHL(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), GenRegister::immud(perThreadShift));
        p->curr.execWidth = this->simdWidth;
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c

index 2223f4f..978650a 100644 (file)
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -244,6 +244,13 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
    assert(offset >= 0);
    stack_sz *= interp_kernel_get_simd_width(ker->opaque);
    stack_sz *= device->max_compute_unit;
+  /* Because HSW calc stack offset per thread is relative with half slice, when
+     thread schedule in half slice is not balance, would out of bound. Because
+     the max half slice is 4 in GT4, multiply stack size with 4 for safe.
+   */
+  if(cl_driver_get_ver(ctx->drv) == 75)
+    stack_sz *= 4;
+
    cl_gpgpu_set_stack(gpgpu, offset, stack_sz, cl_gpgpu_get_cache_ctrl());
  }
  
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c

index 5093583..cae843b 100644 (file)
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -833,6 +833,9 @@ intel_gpgpu_set_scratch(intel_gpgpu_t * gpgpu, uint32_t per_thread_size)
    drm_intel_bufmgr *bufmgr = gpgpu->drv->bufmgr;
    drm_intel_bo* old = gpgpu->scratch_b.bo;
    uint32_t total = per_thread_size * gpgpu->max_threads;
+  /* Per Bspec, scratch should 2X the desired size, otherwise luxmark may hang */
+  if (IS_HASWELL(gpgpu->drv->device_id))
+      total *= 2;
  
    gpgpu->per_thread_scratch = per_thread_size;
author	Yang Rong <rong.r.yang@intel.com>
	Thu, 12 Jun 2014 11:42:12 +0000 (19:42 +0800)
committer	Zhigang Gong <zhigang.gong@intel.com>
	Thu, 12 Jun 2014 05:56:31 +0000 (13:56 +0800)
backend/src/backend/gen75_context.cpp		patch \| blob \| history
src/cl_command_queue_gen7.c		patch \| blob \| history
src/intel/intel_gpgpu.c		patch \| blob \| history