From: bsegovia Date: Tue, 18 Oct 2011 00:59:41 +0000 (+0000) Subject: Added support __constant samplers X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7494443c80651f7bb3e50a3ccbfd4dd11e6d44a7;p=contrib%2Fbeignet.git Added support __constant samplers --- diff --git a/kernels/test_copy_image.cl b/kernels/test_copy_image.cl index 3d6fb06..921b138 100644 --- a/kernels/test_copy_image.cl +++ b/kernels/test_copy_image.cl @@ -2,13 +2,11 @@ __constant sampler_t s = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; __kernel void -test_copy_image(__read_only image2d_t src, - __global uchar4 *dst) +test_copy_image(__read_only image2d_t src, __global uchar4 *dst) { const int x = (int) get_global_id(0); const int y = (int) get_global_id(1); const int id = x + y * get_image_width(src); - //const int id = x + y * 32; const uchar4 from = convert_uchar4(read_imageui(src, s, (int2)(x,y))); dst[id] = from; } diff --git a/src/cl_command_queue_gen6.c b/src/cl_command_queue_gen6.c index c5547e4..e811453 100644 --- a/src/cl_command_queue_gen6.c +++ b/src/cl_command_queue_gen6.c @@ -143,11 +143,7 @@ cl_command_queue_ND_range_gen6(cl_command_queue queue, /* Create the constant buffer */ if (cst_sz > 0) { assert(ker->cst_buffer); - curbe = cl_kernel_create_cst_buffer(ker, - global_wk_off, - global_wk_sz, - local_wk_sz, - 0, 0); + curbe = cl_kernel_create_cst_buffer(ker, global_wk_off, global_wk_sz, local_wk_sz, 0, 0); } /* Only if we want to monitor performance for this kernel */ @@ -166,6 +162,12 @@ cl_command_queue_ND_range_gen6(cl_command_queue queue, &private_bo, &scratch_bo, header.local_mem_sz); + + /* Upload the __constant samplers if any */ + const void *samplers = ker->dynamic_heap + ker->patch.sampler_state.offset; + const uint32_t sampler_n = ker->patch.sampler_state.count; + gpgpu_upload_samplers(gpgpu, samplers, sampler_n); + gpgpu_states_setup(gpgpu, kernels, 16); /* Fill the constant buffer */ diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 7a08de8..8bdb52a 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -42,11 +42,6 @@ #define MO_RETAIN_BIT (1 << 28) #define SAMPLER_STATE_SIZE (16) -/* No dependency on Gen specific structures */ -struct opaque_sampler_state { - char opaque[SAMPLER_STATE_SIZE]; -}; - /* Stores both binding tables and surface states */ typedef struct surface_heap { uint32_t binding_table[256]; @@ -75,9 +70,6 @@ struct intel_gpgpu struct { dri_bo *bo; } sampler_state_b; struct { dri_bo *bo; } perf_b; - /* we will just copy them into the bo */ - struct opaque_sampler_state samplers[MAX_SAMPLERS]; - struct { uint32_t num_cs_entries; uint32_t size_cs_entry; /* size of one entry in 512bit elements */ @@ -86,9 +78,6 @@ struct intel_gpgpu uint32_t max_threads; /* max threads requested by the user */ }; -/* Be sure that the size is still valid */ -STATIC_ASSERT(sizeof(struct opaque_sampler_state) == 16);//sizeof(struct i965_sampler_state)); - LOCAL intel_gpgpu_t* intel_gpgpu_new(intel_driver_t *drv) { @@ -258,51 +247,51 @@ static const uint32_t gpgpu_l3_config_reg2[] = enum INSTRUCTION_PIPELINE { - PIPE_COMMON = 0x0, - PIPE_SINGLE_DWORD = 0x1, - PIPE_COMMON_CTG = 0x1, - PIPE_MEDIA = 0x2, - PIPE_3D = 0x3 + PIPE_COMMON = 0x0, + PIPE_SINGLE_DWORD = 0x1, + PIPE_COMMON_CTG = 0x1, + PIPE_MEDIA = 0x2, + PIPE_3D = 0x3 }; enum GFX_OPCODE { - GFXOP_PIPELINED = 0x0, - GFXOP_NONPIPELINED = 0x1, - GFXOP_3DPRIMITIVE = 0x3 + GFXOP_PIPELINED = 0x0, + GFXOP_NONPIPELINED = 0x1, + GFXOP_3DPRIMITIVE = 0x3 }; enum INSTRUCTION_TYPE { - INSTRUCTION_MI = 0x0, - INSTRUCTION_TRUSTED = 0x1, - INSTRUCTION_2D = 0x2, - INSTRUCTION_GFX = 0x3 + INSTRUCTION_MI = 0x0, + INSTRUCTION_TRUSTED = 0x1, + INSTRUCTION_2D = 0x2, + INSTRUCTION_GFX = 0x3 }; enum GFX3DCONTROL_SUBOPCODE { - GFX3DSUBOP_3DCONTROL = 0x00 + GFX3DSUBOP_3DCONTROL = 0x00 }; enum GFX3D_OPCODE { - GFX3DOP_3DSTATE_PIPELINED = 0x0, - GFX3DOP_3DSTATE_NONPIPELINED = 0x1, - GFX3DOP_3DCONTROL = 0x2, - GFX3DOP_3DPRIMITIVE = 0x3 + GFX3DOP_3DSTATE_PIPELINED = 0x0, + GFX3DOP_3DSTATE_NONPIPELINED = 0x1, + GFX3DOP_3DCONTROL = 0x2, + GFX3DOP_3DPRIMITIVE = 0x3 }; enum GFX3DSTATE_PIPELINED_SUBOPCODE { - GFX3DSUBOP_3DSTATE_PIPELINED_POINTERS = 0x00, - GFX3DSUBOP_3DSTATE_BINDING_TABLE_POINTERS = 0x01, - GFX3DSUBOP_3DSTATE_STATE_POINTER_INVALIDATE = 0x02, - GFX3DSUBOP_3DSTATE_VERTEX_BUFFERS = 0x08, - GFX3DSUBOP_3DSTATE_VERTEX_ELEMENTS = 0x09, - GFX3DSUBOP_3DSTATE_INDEX_BUFFER = 0x0A, - GFX3DSUBOP_3DSTATE_VF_STATISTICS = 0x0B, - GFX3DSUBOP_3DSTATE_CC_STATE_POINTERS = 0x0E + GFX3DSUBOP_3DSTATE_PIPELINED_POINTERS = 0x00, + GFX3DSUBOP_3DSTATE_BINDING_TABLE_POINTERS = 0x01, + GFX3DSUBOP_3DSTATE_STATE_POINTER_INVALIDATE = 0x02, + GFX3DSUBOP_3DSTATE_VERTEX_BUFFERS = 0x08, + GFX3DSUBOP_3DSTATE_VERTEX_ELEMENTS = 0x09, + GFX3DSUBOP_3DSTATE_INDEX_BUFFER = 0x0A, + GFX3DSUBOP_3DSTATE_VF_STATISTICS = 0x0B, + GFX3DSUBOP_3DSTATE_CC_STATE_POINTERS = 0x0E }; static void @@ -463,11 +452,12 @@ gpgpu_state_init(intel_gpgpu_t *state, dri_bo_unreference(state->sampler_state_b.bo); bo = dri_bo_alloc(state->drv->bufmgr, "sample states", - MAX_SAMPLERS * sizeof(struct gen6_sampler_state), + MAX_SAMPLERS * sizeof(gen6_sampler_state_t), 32); assert(bo); + dri_bo_map(bo, 1); + memset(bo->virtual, 0, sizeof(gen6_sampler_state_t) * MAX_SAMPLERS); state->sampler_state_b.bo = bo; - memset(state->samplers, 0, sizeof(state->samplers)); } static void @@ -621,15 +611,6 @@ gpgpu_bind_image2D(intel_gpgpu_t *state, } static void -gpgpu_build_sampler_table(intel_gpgpu_t *state) -{ - dri_bo_subdata(state->sampler_state_b.bo, - 0, - sizeof(state->samplers), - state->samplers); -} - -static void gpgpu_build_idrt(intel_gpgpu_t *state, genx_gpgpu_kernel_t *kernel, uint32_t ker_n) @@ -693,7 +674,7 @@ gpgpu_build_idrt(intel_gpgpu_t *state, } LOCAL void -gpgpu_upload_constants(intel_gpgpu_t *state, void* data, uint32_t size) +gpgpu_upload_constants(intel_gpgpu_t *state, const void* data, uint32_t size) { unsigned char *constant_buffer = NULL; @@ -705,12 +686,22 @@ gpgpu_upload_constants(intel_gpgpu_t *state, void* data, uint32_t size) } LOCAL void +gpgpu_upload_samplers(intel_gpgpu_t *state, const void *data, uint32_t n) +{ + if (n) { + /*sizeof(gen6_sampler_state_t) == sizeof(gen7_surface_state_t) */ + const size_t sz = n * sizeof(gen6_sampler_state_t); + memcpy(state->sampler_state_b.bo->virtual, data, sz); + } +} + +LOCAL void gpgpu_states_setup(intel_gpgpu_t *state, genx_gpgpu_kernel_t *kernel, uint32_t ker_n) { state->ker = kernel; - gpgpu_build_sampler_table(state); gpgpu_build_idrt(state, kernel, ker_n); dri_bo_unmap(state->surface_heap_b.bo); + dri_bo_unmap(state->sampler_state_b.bo); } LOCAL void diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h index 2b7b26a..13c9d27 100644 --- a/src/intel/intel_gpgpu.h +++ b/src/intel/intel_gpgpu.h @@ -91,7 +91,7 @@ extern void gpgpu_state_init(intel_gpgpu_t*, uint32_t max_threads, uint32_t size extern void gpgpu_set_perf_counters(intel_gpgpu_t*, struct _drm_intel_bo *perf); /* Fills current constant buffer with data */ -extern void gpgpu_upload_constants(intel_gpgpu_t*, void* data, uint32_t size); +extern void gpgpu_upload_constants(intel_gpgpu_t*, const void* data, uint32_t size); /* Setup all indirect states */ extern void gpgpu_states_setup(intel_gpgpu_t*, genx_gpgpu_kernel_t* kernel, uint32_t ker_n); @@ -99,7 +99,10 @@ extern void gpgpu_states_setup(intel_gpgpu_t*, genx_gpgpu_kernel_t* kernel, uint /* Make HW threads use barrierID */ extern void gpgpu_update_barrier(intel_gpgpu_t*, uint32_t barrierID, uint32_t thread_n); -/* Set a sampler (TODO: add other sampler fields) */ +/* Upload the constant samplers as specified inside the OCL kernel */ +extern void gpgpu_upload_samplers(intel_gpgpu_t *state, const void *data, uint32_t n); + +/* Set a sampler */ extern void gpgpu_set_sampler(intel_gpgpu_t*, uint32_t index, uint32_t non_normalized); /* Allocate the batch buffer and return the BO used for the batch buffer */ diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h index 23bfc59..0301692 100644 --- a/src/intel/intel_structs.h +++ b/src/intel/intel_structs.h @@ -404,6 +404,8 @@ typedef struct gen7_sampler_state } ss3; } gen7_sampler_state_t; +STATIC_ASSERT(sizeof(gen6_sampler_state_t) == sizeof(gen7_sampler_state_t)); + #undef BITFIELD_BIT #undef BITFIELD_RANGE