From 716af9adf3b4d28919b8fe2925cf920759ab2140 Mon Sep 17 00:00:00 2001 From: bsegovia Date: Sat, 15 Oct 2011 04:06:29 +0000 Subject: [PATCH] Finished first support for image2D. Right now, we only support linear formats --- src/cl_api.c | 36 +++++++++++---- src/cl_command_queue.c | 3 +- src/cl_image.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++ src/cl_image.h | 13 ++++++ src/cl_mem.c | 11 ++++- src/cl_mem.h | 2 + src/cl_utils.h | 3 ++ src/intel/intel_gpgpu.c | 6 +-- 8 files changed, 177 insertions(+), 16 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index e6c7dfb..01fe8c8 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -24,6 +24,7 @@ #include "cl_program.h" #include "cl_kernel.h" #include "cl_mem.h" +#include "cl_image.h" #include "cl_alloc.h" #include "cl_utils.h" @@ -302,15 +303,32 @@ error: } cl_int -clGetSupportedImageFormats(cl_context context, - cl_mem_flags flags, - cl_mem_type image_type, - cl_uint num_entries, - cl_image_format * image_formats, - cl_uint * num_image_formats) +clGetSupportedImageFormats(cl_context ctx, + cl_mem_flags flags, + cl_mem_type image_type, + cl_uint num_entries, + cl_image_format *image_formats, + cl_uint * num_image_formats) { - NOT_IMPLEMENTED; - return 0; + cl_int err = CL_SUCCESS; + CHECK_CONTEXT (ctx); + if (UNLIKELY(num_entries == 0 && image_formats != NULL)) { + err = CL_INVALID_VALUE; + goto error; + } + if (UNLIKELY(image_type != CL_MEM_OBJECT_IMAGE2D && + image_type != CL_MEM_OBJECT_IMAGE3D)) { + err = CL_INVALID_VALUE; + goto error; + } + err = cl_image_get_supported_fmt(ctx, + image_type, + num_entries, + image_formats, + num_image_formats); + +error: + return err; } cl_int @@ -1061,7 +1079,7 @@ clEnqueueBarrier(cl_command_queue command_queue) } void* -clGetExtensionFunctionAddress(const char * func_name) +clGetExtensionFunctionAddress(const char *func_name) { NOT_IMPLEMENTED; return NULL; diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 459dd0b..fc23c06 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -151,8 +151,7 @@ cl_command_queue_bind_surface(cl_command_queue queue, bo = mem->bo; assert(bo); if (mem->is_image) { - const int fmt =0x0CB; // I965_SURFACEFORMAT_R8G8B8A8_UINT - gpgpu_bind_image2D(gpgpu, index, bo, fmt, mem->w, mem->h, 4, cc_llc_l3); + gpgpu_bind_image2D(gpgpu, index, bo, mem->intel_fmt, mem->w, mem->h, mem->bpp, cc_llc_l3); cl_kernel_copy_image_parameters(k, mem, index, curbe); } else gpgpu_bind_buf(gpgpu, index, bo, bo->size, cc_llc_l3); diff --git a/src/cl_image.c b/src/cl_image.c index 7e6e700..bba741d 100644 --- a/src/cl_image.c +++ b/src/cl_image.c @@ -19,6 +19,7 @@ #include "cl_image.h" #include "cl_utils.h" +#include "intel/intel_defines.h" #include @@ -93,3 +94,121 @@ cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp) return CL_SUCCESS; } +LOCAL uint32_t +cl_image_get_intel_format(const cl_image_format *fmt) +{ + const uint32_t type = fmt->image_channel_data_type; + const uint32_t order = fmt->image_channel_order; + switch (order) { + case CL_R: + case CL_A: + case CL_INTENSITY: + case CL_LUMINANCE: + switch (type) { + case CL_HALF_FLOAT: return I965_SURFACEFORMAT_R16_FLOAT; + case CL_FLOAT: return I965_SURFACEFORMAT_R32_FLOAT; + case CL_SNORM_INT16: return I965_SURFACEFORMAT_R16_SNORM; + case CL_SNORM_INT8: return I965_SURFACEFORMAT_R8_SNORM; + case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8_UNORM; + case CL_UNORM_INT16: return I965_SURFACEFORMAT_R16_UNORM; + case CL_SIGNED_INT8: return I965_SURFACEFORMAT_R8_SINT; + case CL_SIGNED_INT16: return I965_SURFACEFORMAT_R16_SINT; + case CL_SIGNED_INT32: return I965_SURFACEFORMAT_R32_SINT; + case CL_UNSIGNED_INT8: return I965_SURFACEFORMAT_R8_UINT; + case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16_UINT; + case CL_UNSIGNED_INT32: return I965_SURFACEFORMAT_R32_UINT; + default: return INTEL_UNSUPPORTED_FORMAT; + }; + case CL_RG: + case CL_RA: + case CL_Rx: + switch (type) { + case CL_HALF_FLOAT: return I965_SURFACEFORMAT_R16G16_FLOAT; + case CL_FLOAT: return I965_SURFACEFORMAT_R32G32_FLOAT; + case CL_SNORM_INT16: return I965_SURFACEFORMAT_R16G16_SNORM; + case CL_SNORM_INT8: return I965_SURFACEFORMAT_R8G8_SNORM; + case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8G8_UNORM; + case CL_UNORM_INT16: return I965_SURFACEFORMAT_R16G16_UNORM; + case CL_SIGNED_INT8: return I965_SURFACEFORMAT_R8G8_SINT; + case CL_SIGNED_INT16: return I965_SURFACEFORMAT_R16G16_SINT; + case CL_SIGNED_INT32: return I965_SURFACEFORMAT_R32G32_SINT; + case CL_UNSIGNED_INT8: return I965_SURFACEFORMAT_R8G8_UINT; + case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16G16_UINT; + case CL_UNSIGNED_INT32: return I965_SURFACEFORMAT_R32G32_UINT; + default: return INTEL_UNSUPPORTED_FORMAT; + }; + case CL_RGB: + case CL_RGBx: + switch (type) { + case CL_UNORM_INT_101010: return I965_SURFACEFORMAT_R10G10B10A2_UNORM; + case CL_UNORM_SHORT_565: + case CL_UNORM_SHORT_555: + default: return INTEL_UNSUPPORTED_FORMAT; + }; + case CL_RGBA: + switch (type) { + case CL_HALF_FLOAT: return I965_SURFACEFORMAT_R16G16B16A16_FLOAT; + case CL_FLOAT: return I965_SURFACEFORMAT_R32G32B32A32_FLOAT; + case CL_SNORM_INT16: return I965_SURFACEFORMAT_R16G16B16A16_SNORM; + case CL_SNORM_INT8: return I965_SURFACEFORMAT_R8G8B8A8_SNORM; + case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8G8B8A8_UNORM; + case CL_UNORM_INT16: return I965_SURFACEFORMAT_R16G16B16A16_UNORM; + case CL_SIGNED_INT8: return I965_SURFACEFORMAT_R8G8B8A8_SINT; + case CL_SIGNED_INT16: return I965_SURFACEFORMAT_R16G16B16A16_SINT; + case CL_SIGNED_INT32: return I965_SURFACEFORMAT_R32G32B32A32_SINT; + case CL_UNSIGNED_INT8: return I965_SURFACEFORMAT_R8G8B8A8_UINT; + case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16G16B16A16_UINT; + case CL_UNSIGNED_INT32: return I965_SURFACEFORMAT_R32G32B32A32_UINT; + default: return INTEL_UNSUPPORTED_FORMAT; + }; + case CL_ARGB: return INTEL_UNSUPPORTED_FORMAT; + case CL_BGRA: + switch (type) { + case CL_UNORM_INT8: return I965_SURFACEFORMAT_B8G8R8A8_UNORM; + default: return INTEL_UNSUPPORTED_FORMAT; + }; + default: return INTEL_UNSUPPORTED_FORMAT; + }; +} + +static const uint32_t cl_image_order[] = { + CL_R, CL_A, CL_RG, CL_RA, CL_RGB, CL_RGBA, CL_BGRA, CL_ARGB, + CL_INTENSITY, CL_LUMINANCE, CL_Rx, CL_RGx, CL_RGBx +}; + +static const uint32_t cl_image_type[] = { + CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, + CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010, + CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, + CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, + CL_HALF_FLOAT, CL_FLOAT +}; + +static const size_t cl_image_order_n = SIZEOF32(cl_image_order); +static const size_t cl_image_type_n = SIZEOF32(cl_image_type); + +cl_int +cl_image_get_supported_fmt(cl_context ctx, + cl_mem_type image_type, + cl_uint num_entries, + cl_image_format *image_formats, + cl_uint *num_image_formats) +{ + size_t i, j, n = 0; + assert(image_formats); + for (i = 0; i < cl_image_order_n; ++i) + for (j = 0; j < cl_image_type_n; ++j) { + const cl_image_format fmt = { + .image_channel_order = cl_image_order[i], + .image_channel_data_type = cl_image_type[j] + }; + const uint32_t intel_fmt = cl_image_get_intel_format(&fmt); + if (intel_fmt == INTEL_UNSUPPORTED_FORMAT) + continue; + if (n < num_entries && image_formats) image_formats[n] = fmt; + n++; + } + if (num_image_formats) *num_image_formats = n; + return CL_SUCCESS; +} + diff --git a/src/cl_image.h b/src/cl_image.h index 923bf1d..4afb4d4 100644 --- a/src/cl_image.h +++ b/src/cl_image.h @@ -24,8 +24,21 @@ #include "CL/cl.h" #include +/* Returned when the OCL format is not supported */ +#define INTEL_UNSUPPORTED_FORMAT ((uint32_t) ~0x0u) + /* Compute the number of bytes per pixel if the format is supported */ extern cl_int cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp); +/* Return the intel format for the given OCL format */ +extern uint32_t cl_image_get_intel_format(const cl_image_format *fmt); + +/* Return the list of formats supported by the API */ +extern cl_int cl_image_get_supported_fmt(cl_context context, + cl_mem_type image_type, + cl_uint num_entries, + cl_image_format *image_formats, + cl_uint *num_image_formats); + #endif /* __CL_IMAGE_H__ */ diff --git a/src/cl_mem.c b/src/cl_mem.c index f2fad25..a07414d 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -136,7 +136,7 @@ cl_mem_new_image2D(cl_context ctx, { cl_int err = CL_SUCCESS; cl_mem mem = NULL; - uint32_t bpp = 0; + uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT; size_t sz = 0; /* Check flags consistency */ @@ -149,6 +149,13 @@ cl_mem_new_image2D(cl_context ctx, if (UNLIKELY((err = cl_image_byte_per_pixel(fmt, &bpp)) != CL_SUCCESS)) goto error; + /* Only a sub-set of the formats are supported */ + intel_fmt = cl_image_get_intel_format(fmt); + if (UNLIKELY(intel_fmt == INTEL_UNSUPPORTED_FORMAT)) { + err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + goto error; + } + /* See if the user parameters match */ #define DO_IMAGE_ERROR \ do { \ @@ -188,7 +195,9 @@ cl_mem_new_image2D(cl_context ctx, mem->w = w; mem->h = h; mem->fmt = *fmt; + mem->intel_fmt = intel_fmt; mem->pitch = w * bpp; + mem->bpp = bpp; mem->is_image = 1; exit: diff --git a/src/cl_mem.h b/src/cl_mem.h index 9a9ceb3..420e869 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -37,6 +37,8 @@ struct _cl_mem { uint32_t is_image; /* Indicate if this is an image or not */ cl_image_format fmt; /* only for images */ size_t w,h,depth,pitch; /* only for images (depth is only for 3d images) */ + uint32_t intel_fmt; /* format to provide in the surface state */ + uint32_t bpp; /* number of bytes per pixel */ }; /* Create a new memory object and initialize it with possible user data */ diff --git a/src/cl_utils.h b/src/cl_utils.h index 944e044..29268c6 100644 --- a/src/cl_utils.h +++ b/src/cl_utils.h @@ -311,6 +311,9 @@ do { \ /* Align a structure or a variable */ #define ALIGNED(X) __attribute__ ((aligned (X))) +/* Number of DWORDS */ +#define SIZEOF32(X) (sizeof(X) / sizeof(uint32_t)) + /* 32 bits atomic variable */ typedef volatile int atomic_t; diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 040e4aa..eb5909a 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -256,8 +256,6 @@ static const uint32_t gpgpu_l3_config_reg2[] = #define L3_CNTL_REG2_ADDRESS_OFFSET (0xB020) #define L3_CNTL_REG3_ADDRESS_OFFSET (0xB024) -#define sizeof32(X) (sizeof(X) / sizeof(uint32_t)) - enum INSTRUCTION_PIPELINE { PIPE_COMMON = 0x0, @@ -310,11 +308,11 @@ enum GFX3DSTATE_PIPELINED_SUBOPCODE static void gpgpu_pipe_control(intel_gpgpu_t *state) { - BEGIN_BATCH(state->batch, sizeof32(gen6_pipe_control_t)); + BEGIN_BATCH(state->batch, SIZEOF32(gen6_pipe_control_t)); gen6_pipe_control_t* pc = (gen6_pipe_control_t*) intel_batchbuffer_alloc_space(state->batch, 0); memset(pc, 0, sizeof(*pc)); - pc->dw0.length = sizeof32(gen6_pipe_control_t) - 2; + pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2; pc->dw0.instruction_subopcode = GFX3DSUBOP_3DCONTROL; pc->dw0.instruction_opcode = GFX3DOP_3DCONTROL; pc->dw0.instruction_pipeline = PIPE_3D; -- 2.7.4