From e89a9768d8cc597cfac2113027ec8d02cadf28d9 Mon Sep 17 00:00:00 2001 From: bsegovia Date: Tue, 24 Jan 2012 00:32:42 +0000 Subject: [PATCH] Fixed SNB build --- src/CMakeLists.txt | 2 ++ src/cl_command_queue_gen7.c | 1 + src/cl_kernel.h | 4 ++++ src/intel/intel_gpgpu.c | 24 +++++++++++++++++++----- src/intel/intel_structs.h | 2 +- 5 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 067e99e..d7a10e3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -64,6 +64,7 @@ ADD_EXECUTABLE(dct tests/dct.c) ADD_EXECUTABLE(binomialOption tests/binomialOption.c) ADD_EXECUTABLE(nbody tests/nbody.c) ADD_EXECUTABLE(svm_test tests/svm_test.c) +ADD_EXECUTABLE(fast_mat_mul tests/fast_mat_mul.cpp) TARGET_LINK_LIBRARIES(test_copy_buffer cl_test m) TARGET_LINK_LIBRARIES(test_copy_image cl_test m) TARGET_LINK_LIBRARIES(test_enqueue_read cl_test m) @@ -88,4 +89,5 @@ TARGET_LINK_LIBRARIES(dct cl_test m) TARGET_LINK_LIBRARIES(binomialOption cl_test m) TARGET_LINK_LIBRARIES(nbody cl_test m) TARGET_LINK_LIBRARIES(svm_test cl_test m) +TARGET_LINK_LIBRARIES(fast_mat_mul cl_test m) diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 8c93b73..dce1c2c 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -118,6 +118,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue, /* Check that the local work sizes are OK */ TRY (cl_kernel_work_group_sz, ker, local_wk_sz, 3, &local_sz); + //kernel.thread_n = thread_n = local_sz / simd_sz; kernel.thread_n = thread_n = local_sz / simd_sz; /* CURBE step 1. Allocate and fill fields shared by threads in workgroup */ diff --git a/src/cl_kernel.h b/src/cl_kernel.h index d11d551..c27cff6 100644 --- a/src/cl_kernel.h +++ b/src/cl_kernel.h @@ -288,7 +288,11 @@ struct _cl_kernel { }; /* Size of the surface state as encoded in the binary blob */ +#if USE_OLD_COMPILER +#define SURFACE_SZ 32 +#else #define SURFACE_SZ 64 +#endif /* Allocate an empty kernel */ extern cl_kernel cl_kernel_new(void); diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 753a9bd..10e20db 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -177,10 +177,19 @@ gpgpu_load_vfe_state(intel_gpgpu_t *state) vfe->vfe1.reset_gateway_timer = 1; vfe->vfe1.max_threads = state->max_threads - 1; vfe->vfe1.urb_entries = 64; - vfe->vfe3.curbe_size = 63; - vfe->vfe3.urbe_size = 13; - vfe->vfe4.scoreboard_mask = - (state->drv->gen_ver == 7 || state->drv->gen_ver == 75) ? 0 : 0x80000000; + if (state->drv->gen_ver >= 7) { + vfe->vfe3.curbe_size = 480; + vfe->vfe4.scoreboard_mask = 0; + } else { + vfe->vfe3.curbe_size = 63; + vfe->vfe3.urb_size = 13; + vfe->vfe4.scoreboard_mask = 0x80000000; + } + + //M + //p + //vfe->vfe3.urb_size = 13; + //vfe->vfe4.scoreboard_mask = (state->drv->gen_ver == 7 || state->drv->gen_ver == 75) ? 0 : 0x80000000; intel_batchbuffer_alloc_space(state->batch, sizeof(gen6_vfe_state_inline_t)); ADVANCE_BATCH(state->batch); } @@ -191,9 +200,14 @@ gpgpu_load_constant_buffer(intel_gpgpu_t *state) BEGIN_BATCH(state->batch, 4); OUT_BATCH(state->batch, CMD(2,0,1) | (4 - 2)); /* length-2 */ OUT_BATCH(state->batch, 0); /* mbz */ +// XXX +#if 1 OUT_BATCH(state->batch, state->urb.size_cs_entry* state->urb.num_cs_entries*32); +#else + OUT_BATCH(state->batch, 5120); +#endif OUT_RELOC(state->batch, state->curbe_b.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); ADVANCE_BATCH(state->batch); } @@ -204,7 +218,7 @@ gpgpu_load_idrt(intel_gpgpu_t *state) BEGIN_BATCH(state->batch, 4); OUT_BATCH(state->batch, CMD(2,0,2) | (4 - 2)); /* length-2 */ OUT_BATCH(state->batch, 0); /* mbz */ - OUT_BATCH(state->batch, state->idrt_b.num*32); + OUT_BATCH(state->batch, state->idrt_b.num << 5); OUT_RELOC(state->batch, state->idrt_b.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); ADVANCE_BATCH(state->batch); } diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h index 0301692..84b5a17 100644 --- a/src/intel/intel_structs.h +++ b/src/intel/intel_structs.h @@ -234,7 +234,7 @@ typedef struct gen6_vfe_state_inline struct { uint32_t curbe_size:16; /* in GRFs */ - uint32_t urbe_size:16; /* in GRFs */ + uint32_t urb_size:16; /* in GRFs */ } vfe3; struct { -- 2.7.4