From 35936076f07b8ebc7f5b90e54b345ace01150be2 Mon Sep 17 00:00:00 2001 From: Zhigang Gong Date: Fri, 21 Feb 2014 12:50:55 +0800 Subject: [PATCH] GBE: optimize read_image to avoid get sampler info dynamically. Most of time, the user is using a const sampler value in the kernel directly. Thus we don't need to get the sampler value through a function call. And this way, the compiler front end could do much better optimization than using the dynamic get sampler information. For the luxmark's median/simple case, this patch could get about 30-45% performance gain. Signed-off-by: Zhigang Gong Reviewed-by: "Yang, Rong R" --- backend/src/ocl_stdlib.tmpl.h | 13 ++++++------- src/cl_kernel.c | 3 +++ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 307f562..cde9cfb 100755 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -4521,23 +4521,21 @@ ushort __gen_ocl_get_sampler_info(sampler_t sampler); #define DECL_READ_IMAGE(float_coord_rounding_fix, int_clamping_fix, \ image_type, type, suffix, coord_type) \ INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image, \ - sampler_t sampler, \ + const sampler_t sampler, \ coord_type coord) \ { \ GET_IMAGE(cl_image, surface_id); \ coord_type tmpCoord = coord; \ - ushort samplerValue; \ if (float_coord_rounding_fix | int_clamping_fix) { \ - samplerValue = __gen_ocl_get_sampler_info(sampler); \ - if (((samplerValue & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) \ - && ((samplerValue & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) { \ + if (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) \ + && ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST)) { \ if (float_coord_rounding_fix \ - && ((samplerValue & CLK_NORMALIZED_COORDS_TRUE) == 0)) { \ + && ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0)) { \ FIXUP_FLOAT_COORD(tmpCoord); \ } \ if (int_clamping_fix) { \ if (OUT_OF_BOX(tmpCoord, surface_id, \ - (samplerValue & CLK_NORMALIZED_COORDS_TRUE))) { \ + (sampler & CLK_NORMALIZED_COORDS_TRUE))) { \ unsigned int border_alpha; \ int order = __gen_ocl_get_image_channel_order(surface_id); \ if (!CLK_HAS_ALPHA(order)) { \ @@ -4555,6 +4553,7 @@ ushort __gen_ocl_get_sampler_info(sampler_t sampler); EXPEND_READ_COORD(surface_id, sampler, tmpCoord), 0);\ } + #define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, coord_type) \ INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image, \ coord_type coord) \ diff --git a/src/cl_kernel.c b/src/cl_kernel.c index 6a0c8e6..ecb5e95 100644 --- a/src/cl_kernel.c +++ b/src/cl_kernel.c @@ -172,6 +172,9 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value) k->args[index].mem = NULL; k->args[index].sampler = sampler; cl_set_sampler_arg_slot(k, index, sampler); + offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index); + assert(offset + 2 <= k->curbe_sz); + memcpy(k->curbe + offset, &sampler->clkSamplerValue, 2); return CL_SUCCESS; } -- 2.7.4