From: Zhigang Gong Date: Sun, 6 Jan 2013 05:16:49 +0000 (+0800) Subject: Implement sampler support. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0a5e96e997ee799bd219837604e7bd64a3d76b05;p=contrib%2Fbeignet.git Implement sampler support. Accept sampler kernel argument and pass the sampler to driver side. At driver side, gen7 driver will allocate a sampler slot and initlaize one sampler state according to the cl sampler state. and then pass back the slot index to the curbe array. After this commit, read_imagei(image, sampler, coord) could work as expected. Signed-off-by: Zhigang Gong Reviewed-by: Lu Guanqun --- diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 943a332..d31f58a 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -107,7 +107,8 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k) arg_type = gbe_kernel_get_arg_type(k->opaque, i); if (arg_type != GBE_ARG_GLOBAL_PTR && arg_type != GBE_ARG_CONSTANT_PTR && - arg_type != GBE_ARG_IMAGE) + arg_type != GBE_ARG_IMAGE && + arg_type != GBE_ARG_SAMPLER) continue; offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i); if (arg_type == GBE_ARG_IMAGE) { @@ -116,6 +117,9 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k) k->args[i].mem->intel_fmt, k->args[i].mem->w, k->args[i].mem->h, k->args[i].mem->pitch, k->args[i].mem->tiling); + } else if (arg_type == GBE_ARG_SAMPLER) { + uint32_t *curbe_index = (uint32_t*)(k->curbe + offset); + cl_gpgpu_insert_sampler(queue->gpgpu, curbe_index, k->args[i].sampler); } else cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, cc_llc_l3); } diff --git a/src/cl_driver.h b/src/cl_driver.h index 8c86f32..43719a9 100644 --- a/src/cl_driver.h +++ b/src/cl_driver.h @@ -47,6 +47,7 @@ typedef struct _cl_driver *cl_driver; typedef struct _cl_gpgpu *cl_gpgpu; typedef struct _cl_context_prop *cl_context_prop; +typedef struct _cl_sampler *cl_sampler; /************************************************************************** * Driver @@ -109,6 +110,10 @@ extern cl_gpgpu_delete_cb *cl_gpgpu_delete; typedef void (cl_gpgpu_bind_buf_cb)(cl_gpgpu, cl_buffer, uint32_t offset, uint32_t cchint); extern cl_gpgpu_bind_buf_cb *cl_gpgpu_bind_buf; +/* Insert a sampler */ +typedef void (cl_gpgpu_insert_sampler_cb)(cl_gpgpu, uint32_t *curbe_index, cl_sampler sampler); +extern cl_gpgpu_insert_sampler_cb *cl_gpgpu_insert_sampler; + /* Set a 2d texture */ typedef void (cl_gpgpu_bind_image2D_cb)(cl_gpgpu state, uint32_t *curbe_index, diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c index a0d0d8c..a9c1e22 100644 --- a/src/cl_driver_defs.c +++ b/src/cl_driver_defs.c @@ -57,4 +57,5 @@ LOCAL cl_gpgpu_batch_start_cb *cl_gpgpu_batch_start = NULL; LOCAL cl_gpgpu_batch_end_cb *cl_gpgpu_batch_end = NULL; LOCAL cl_gpgpu_flush_cb *cl_gpgpu_flush = NULL; LOCAL cl_gpgpu_walker_cb *cl_gpgpu_walker = NULL; +LOCAL cl_gpgpu_insert_sampler_cb *cl_gpgpu_insert_sampler = NULL; diff --git a/src/cl_kernel.c b/src/cl_kernel.c index 831d36b..356a8a7 100644 --- a/src/cl_kernel.c +++ b/src/cl_kernel.c @@ -25,6 +25,7 @@ #include "cl_alloc.h" #include "cl_utils.h" #include "CL/cl.h" +#include "cl_sampler.h" #include #include @@ -126,6 +127,22 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value) return CL_SUCCESS; } + /* For a sampler*/ + if (arg_type == GBE_ARG_SAMPLER) { + cl_sampler sampler; + if (UNLIKELY(value == NULL)) + return CL_INVALID_KERNEL_ARGS; + sampler = *(cl_sampler*)value; + + if (UNLIKELY(sampler->magic != CL_MAGIC_SAMPLER_HEADER)) + return CL_INVALID_ARG_VALUE; + k->args[index].local_sz = 0; + k->args[index].is_set = 1; + k->args[index].mem = NULL; + k->args[index].sampler = sampler; + return CL_SUCCESS; + } + /* Otherwise, we just need to check that this is a buffer */ if (UNLIKELY(value == NULL)) return CL_INVALID_KERNEL_ARGS; diff --git a/src/cl_kernel.h b/src/cl_kernel.h index 0347d0a..e444f3b 100644 --- a/src/cl_kernel.h +++ b/src/cl_kernel.h @@ -36,6 +36,7 @@ struct _gbe_kernel; */ typedef struct cl_argument { cl_mem mem; /* For image and regular buffers */ + cl_sampler sampler; /* For sampler. */ uint32_t local_sz:31; /* For __local size specification */ uint32_t is_set:1; /* All args must be set before NDRange */ } cl_argument; diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h index fbf4619..19bdbed 100644 --- a/src/intel/intel_defines.h +++ b/src/intel/intel_defines.h @@ -301,5 +301,27 @@ #define GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL 0x2 #define GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL 0x0 +#define GEN_MAPFILTER_NEAREST 0x0 +#define GEN_MAPFILTER_LINEAR 0x1 +#define GEN_MAPFILTER_ANISOTROPIC 0x2 + +#define GEN_MIPFILTER_NONE 0 +#define GEN_MIPFILTER_NEAREST 1 +#define GEN_MIPFILTER_LINEAR 3 + +#define GEN_ADDRESS_ROUNDING_ENABLE_U_MAG 0x20 +#define GEN_ADDRESS_ROUNDING_ENABLE_U_MIN 0x10 +#define GEN_ADDRESS_ROUNDING_ENABLE_V_MAG 0x08 +#define GEN_ADDRESS_ROUNDING_ENABLE_V_MIN 0x04 +#define GEN_ADDRESS_ROUNDING_ENABLE_R_MAG 0x02 +#define GEN_ADDRESS_ROUNDING_ENABLE_R_MIN 0x01 + +#define GEN_TEXCOORDMODE_WRAP 0 +#define GEN_TEXCOORDMODE_MIRROR 1 +#define GEN_TEXCOORDMODE_CLAMP 2 +#define GEN_TEXCOORDMODE_CUBE 3 +#define GEN_TEXCOORDMODE_CLAMP_BORDER 4 +#define GEN_TEXCOORDMODE_MIRROR_ONCE 5 + #endif /* __GENX_DEFINES_H__ */ diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 119bf31..c035076 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -37,6 +37,7 @@ #include "cl_alloc.h" #include "cl_utils.h" +#include "cl_sampler.h" #define GEN_CMD_MEDIA_OBJECT (0x71000000) #define MO_TS_BIT (1 << 24) @@ -56,6 +57,8 @@ enum { max_buf_n = 128 }; enum { max_img_n = 32 }; +enum {max_sampler_n = 16 }; + /* Handle GPGPU state */ struct intel_gpgpu { @@ -70,6 +73,8 @@ struct intel_gpgpu unsigned int img_index_base; /* base index for image surface.*/ drm_intel_bo *binded_img[max_img_n]; /* all images binded for the call */ + unsigned int sampler_bitmap; /* sampler usage bitmap. */ + struct { drm_intel_bo *bo; } stack_b; struct { drm_intel_bo *bo; } idrt_b; struct { drm_intel_bo *bo; } surface_heap_b; @@ -347,6 +352,7 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu, gpgpu->binded_n = 0; gpgpu->img_bitmap = 0; gpgpu->img_index_base = 2; + gpgpu->sampler_bitmap = ~((1 << max_sampler_n) - 1); /* URB */ gpgpu->urb.num_cs_entries = 64; @@ -481,6 +487,16 @@ intel_gpgpu_get_free_img_index(intel_gpgpu_t *gpgpu) return slot + gpgpu->img_index_base; } +static int +intel_gpgpu_get_free_sampler_index(intel_gpgpu_t *gpgpu) +{ + int slot; + assert(~gpgpu->sampler_bitmap != 0); + slot = __fls(~gpgpu->sampler_bitmap); + gpgpu->sampler_bitmap |= (1 << (31 - slot)); + return slot - max_sampler_n; +} + static void intel_gpgpu_bind_image2D_gen7(intel_gpgpu_t *gpgpu, uint32_t *curbe_index, @@ -638,6 +654,88 @@ intel_gpgpu_upload_samplers(intel_gpgpu_t *gpgpu, const void *data, uint32_t n) } } +int translate_wrap_mode(uint32_t cl_address_mode, int using_nearest) +{ + switch( cl_address_mode ) { + case CL_ADDRESS_NONE: + case CL_ADDRESS_REPEAT: + return GEN_TEXCOORDMODE_WRAP; + case CL_ADDRESS_CLAMP: + /* GL_CLAMP is the weird mode where coordinates are clamped to + * [0.0, 1.0], so linear filtering of coordinates outside of + * [0.0, 1.0] give you half edge texel value and half border + * color. The fragment shader will clamp the coordinates, and + * we set clamp_border here, which gets the result desired. We + * just use clamp(_to_edge) for nearest, because for nearest + * clamping to 1.0 gives border color instead of the desired + * edge texels. + */ + if (using_nearest) + return GEN_TEXCOORDMODE_CLAMP; + else + return GEN_TEXCOORDMODE_CLAMP_BORDER; + case CL_ADDRESS_CLAMP_TO_EDGE: + return GEN_TEXCOORDMODE_CLAMP; + case CL_ADDRESS_MIRRORED_REPEAT: + return GEN_TEXCOORDMODE_MIRROR; + default: + return GEN_TEXCOORDMODE_WRAP; + } +} + +static void +intel_gpgpu_insert_sampler(intel_gpgpu_t *gpgpu, uint32_t *curbe_index, cl_sampler cl_sampler) +{ + int index; + int using_nearest = 0; + uint32_t wrap_mode; + gen7_sampler_state_t *sampler; + + index = intel_gpgpu_get_free_sampler_index(gpgpu); + sampler = (gen7_sampler_state_t *)gpgpu->sampler_state_b.bo->virtual + index; + if (!cl_sampler->normalized_coords) + sampler->ss3.non_normalized_coord = 1; + else + sampler->ss3.non_normalized_coord = 0; + + switch (cl_sampler->filter) { + case CL_FILTER_NEAREST: + sampler->ss0.min_filter = GEN_MAPFILTER_NEAREST; + sampler->ss0.mip_filter = GEN_MIPFILTER_NONE; + sampler->ss0.mag_filter = GEN_MAPFILTER_NEAREST; + using_nearest = 1; + break; + case CL_FILTER_LINEAR: + sampler->ss0.min_filter = GEN_MAPFILTER_LINEAR; + sampler->ss0.mip_filter = GEN_MIPFILTER_NONE; + sampler->ss0.mag_filter = GEN_MAPFILTER_LINEAR; + break; + } + + wrap_mode = translate_wrap_mode(cl_sampler->address, using_nearest); + sampler->ss3.r_wrap_mode = wrap_mode; + sampler->ss3.s_wrap_mode = wrap_mode; + sampler->ss3.t_wrap_mode = wrap_mode; + + sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ + sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ + + sampler->ss0.base_level = 0; + + sampler->ss1.max_lod = 0; + sampler->ss1.min_lod = 0; + + if (sampler->ss0.min_filter != GEN_MAPFILTER_NEAREST) + sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MIN | + GEN_ADDRESS_ROUNDING_ENABLE_V_MIN | + GEN_ADDRESS_ROUNDING_ENABLE_R_MIN; + if (sampler->ss0.mag_filter != GEN_MAPFILTER_NEAREST) + sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MAG | + GEN_ADDRESS_ROUNDING_ENABLE_V_MAG | + GEN_ADDRESS_ROUNDING_ENABLE_R_MAG; + *curbe_index = index; +} + static void intel_gpgpu_states_setup(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel) { @@ -712,5 +810,6 @@ intel_set_gpgpu_callbacks(void) cl_gpgpu_batch_end = (cl_gpgpu_batch_end_cb *) intel_gpgpu_batch_end; cl_gpgpu_flush = (cl_gpgpu_flush_cb *) intel_gpgpu_flush; cl_gpgpu_walker = (cl_gpgpu_walker_cb *) intel_gpgpu_walker; + cl_gpgpu_insert_sampler = (cl_gpgpu_insert_sampler_cb *) intel_gpgpu_insert_sampler; }