From 1e4ed293c091488c4f894cf19ebbd8825327f788 Mon Sep 17 00:00:00 2001 From: Zhigang Gong Date: Tue, 18 Feb 2014 18:32:33 +0800 Subject: [PATCH] GBE: code cleanup for read_image/write_image. Remove some useless instructions and make the read/write_image more readable. Signed-off-by: Zhigang Gong Reviewed-by: "Yang, Rong R" --- backend/src/llvm/llvm_gen_backend.cpp | 133 +++++++++-------------------- backend/src/llvm/llvm_gen_ocl_function.hxx | 42 ++++----- backend/src/llvm/llvm_scalarize.cpp | 36 +++----- backend/src/ocl_stdlib.tmpl.h | 12 --- 4 files changed, 69 insertions(+), 154 deletions(-) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index fce328e..77aee20 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2093,31 +2093,19 @@ namespace gbe case GEN_OCL_LGBARRIER: ctx.getFunction().setUseSLM(true); break; - case GEN_OCL_WRITE_IMAGE0: - case GEN_OCL_WRITE_IMAGE1: - case GEN_OCL_WRITE_IMAGE2: - case GEN_OCL_WRITE_IMAGE3: - case GEN_OCL_WRITE_IMAGE4: - case GEN_OCL_WRITE_IMAGE5: - case GEN_OCL_WRITE_IMAGE10: - case GEN_OCL_WRITE_IMAGE11: - case GEN_OCL_WRITE_IMAGE12: - case GEN_OCL_WRITE_IMAGE13: - case GEN_OCL_WRITE_IMAGE14: - case GEN_OCL_WRITE_IMAGE15: + case GEN_OCL_WRITE_IMAGE_I: + case GEN_OCL_WRITE_IMAGE_UI: + case GEN_OCL_WRITE_IMAGE_F: + case GEN_OCL_WRITE_IMAGE_I_3D: + case GEN_OCL_WRITE_IMAGE_UI_3D: + case GEN_OCL_WRITE_IMAGE_F_3D: break; - case GEN_OCL_READ_IMAGE0: - case GEN_OCL_READ_IMAGE1: - case GEN_OCL_READ_IMAGE2: - case GEN_OCL_READ_IMAGE3: - case GEN_OCL_READ_IMAGE4: - case GEN_OCL_READ_IMAGE5: - case GEN_OCL_READ_IMAGE10: - case GEN_OCL_READ_IMAGE11: - case GEN_OCL_READ_IMAGE12: - case GEN_OCL_READ_IMAGE13: - case GEN_OCL_READ_IMAGE14: - case GEN_OCL_READ_IMAGE15: + case GEN_OCL_READ_IMAGE_I: + case GEN_OCL_READ_IMAGE_UI: + case GEN_OCL_READ_IMAGE_F: + case GEN_OCL_READ_IMAGE_I_3D: + case GEN_OCL_READ_IMAGE_UI_3D: + case GEN_OCL_READ_IMAGE_F_3D: { // dst is a 4 elements vector. We allocate all 4 registers here. uint32_t elemNum; @@ -2385,18 +2373,12 @@ namespace gbe ctx.GET_SAMPLER_INFO(reg, ir::ocl::samplerinfo, index); break; } - case GEN_OCL_READ_IMAGE0: - case GEN_OCL_READ_IMAGE1: - case GEN_OCL_READ_IMAGE2: - case GEN_OCL_READ_IMAGE3: - case GEN_OCL_READ_IMAGE4: - case GEN_OCL_READ_IMAGE5: - case GEN_OCL_READ_IMAGE10: - case GEN_OCL_READ_IMAGE11: - case GEN_OCL_READ_IMAGE12: - case GEN_OCL_READ_IMAGE13: - case GEN_OCL_READ_IMAGE14: - case GEN_OCL_READ_IMAGE15: + case GEN_OCL_READ_IMAGE_I: + case GEN_OCL_READ_IMAGE_UI: + case GEN_OCL_READ_IMAGE_F: + case GEN_OCL_READ_IMAGE_I_3D: + case GEN_OCL_READ_IMAGE_UI_3D: + case GEN_OCL_READ_IMAGE_F_3D: { GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI; const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg); @@ -2408,7 +2390,7 @@ namespace gbe GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI; ir::Register wcoord; bool is3D = false; - if (it->second >= GEN_OCL_READ_IMAGE10 && it->second <= GEN_OCL_READ_IMAGE15) { + if (it->second >= GEN_OCL_READ_IMAGE_I_3D) { GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI; is3D = true; } else @@ -2434,52 +2416,33 @@ namespace gbe const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum); const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 3); - ir::Type srcType = ir::TYPE_S32, dstType = ir::TYPE_U32; + ir::Type dstType = ir::TYPE_U32; switch(it->second) { - case GEN_OCL_READ_IMAGE0: - case GEN_OCL_READ_IMAGE2: - case GEN_OCL_READ_IMAGE10: - case GEN_OCL_READ_IMAGE12: + case GEN_OCL_READ_IMAGE_I: + case GEN_OCL_READ_IMAGE_UI: + case GEN_OCL_READ_IMAGE_I_3D: + case GEN_OCL_READ_IMAGE_UI_3D: dstType = ir::TYPE_U32; - srcType = ir::TYPE_S32; break; - case GEN_OCL_READ_IMAGE1: - case GEN_OCL_READ_IMAGE3: - case GEN_OCL_READ_IMAGE11: - case GEN_OCL_READ_IMAGE13: - dstType = ir::TYPE_U32; - srcType = ir::TYPE_FLOAT; - break; - case GEN_OCL_READ_IMAGE4: - case GEN_OCL_READ_IMAGE14: + case GEN_OCL_READ_IMAGE_F: + case GEN_OCL_READ_IMAGE_F_3D: dstType = ir::TYPE_FLOAT; - srcType = ir::TYPE_S32; - break; - case GEN_OCL_READ_IMAGE5: - case GEN_OCL_READ_IMAGE15: - srcType = dstType = ir::TYPE_FLOAT; break; default: GBE_ASSERT(0); // never been here. } ctx.SAMPLE(surfaceID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT, - srcType == ir::TYPE_FLOAT, sampler, samplerOffset, is3D); + true, sampler, samplerOffset, is3D); break; } - case GEN_OCL_WRITE_IMAGE0: - case GEN_OCL_WRITE_IMAGE1: - case GEN_OCL_WRITE_IMAGE2: - case GEN_OCL_WRITE_IMAGE3: - case GEN_OCL_WRITE_IMAGE4: - case GEN_OCL_WRITE_IMAGE5: - case GEN_OCL_WRITE_IMAGE10: - case GEN_OCL_WRITE_IMAGE11: - case GEN_OCL_WRITE_IMAGE12: - case GEN_OCL_WRITE_IMAGE13: - case GEN_OCL_WRITE_IMAGE14: - case GEN_OCL_WRITE_IMAGE15: + case GEN_OCL_WRITE_IMAGE_I: + case GEN_OCL_WRITE_IMAGE_UI: + case GEN_OCL_WRITE_IMAGE_F: + case GEN_OCL_WRITE_IMAGE_I_3D: + case GEN_OCL_WRITE_IMAGE_UI_3D: + case GEN_OCL_WRITE_IMAGE_F_3D: { GBE_ASSERT(AI != AE); const ir::Register surfaceReg = this->getRegister(*AI); ++AI; const uint8_t surfaceID = ctx.getFunction().getImageSet()->getIdx(surfaceReg); @@ -2487,7 +2450,7 @@ namespace gbe GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI; ir::Register wcoord; bool is3D = false; - if(it->second >= GEN_OCL_WRITE_IMAGE10 && it->second <= GEN_OCL_WRITE_IMAGE15) { + if(it->second >= GEN_OCL_WRITE_IMAGE_I_3D) { GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI; is3D = true; } else @@ -2506,36 +2469,24 @@ namespace gbe } const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 7); - ir::Type srcType = ir::TYPE_U32, coordType = ir::TYPE_U32; + ir::Type srcType = ir::TYPE_U32; switch(it->second) { - case GEN_OCL_WRITE_IMAGE0: - case GEN_OCL_WRITE_IMAGE2: - case GEN_OCL_WRITE_IMAGE10: - case GEN_OCL_WRITE_IMAGE12: - srcType = coordType = ir::TYPE_U32; - break; - case GEN_OCL_WRITE_IMAGE1: - case GEN_OCL_WRITE_IMAGE3: - case GEN_OCL_WRITE_IMAGE11: - case GEN_OCL_WRITE_IMAGE13: - coordType = ir::TYPE_FLOAT; + case GEN_OCL_WRITE_IMAGE_I: + case GEN_OCL_WRITE_IMAGE_UI: + case GEN_OCL_WRITE_IMAGE_I_3D: + case GEN_OCL_WRITE_IMAGE_UI_3D: srcType = ir::TYPE_U32; break; - case GEN_OCL_WRITE_IMAGE4: - case GEN_OCL_WRITE_IMAGE14: + case GEN_OCL_WRITE_IMAGE_F: + case GEN_OCL_WRITE_IMAGE_F_3D: srcType = ir::TYPE_FLOAT; - coordType = ir::TYPE_U32; - break; - case GEN_OCL_WRITE_IMAGE5: - case GEN_OCL_WRITE_IMAGE15: - srcType = coordType = ir::TYPE_FLOAT; break; default: GBE_ASSERT(0); // never been here. } - ctx.TYPED_WRITE(surfaceID, srcTuple, srcType, coordType, is3D); + ctx.TYPED_WRITE(surfaceID, srcTuple, srcType, ir::TYPE_U32, is3D); break; } case GEN_OCL_MUL_HI_INT: diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index de2890c..750344a 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -44,34 +44,22 @@ DECL_LLVM_GEN_FUNCTION(FORCE_SIMD8, __gen_ocl_force_simd8) DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16) // To read_image functions. -DECL_LLVM_GEN_FUNCTION(READ_IMAGE0, _Z21__gen_ocl_read_imageijtiij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE1, _Z21__gen_ocl_read_imageijtffj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE2, _Z22__gen_ocl_read_imageuijtiij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE3, _Z22__gen_ocl_read_imageuijtffj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE4, _Z21__gen_ocl_read_imagefjtiij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE5, _Z21__gen_ocl_read_imagefjtffj) - -DECL_LLVM_GEN_FUNCTION(READ_IMAGE10, _Z21__gen_ocl_read_imageijtiiij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE11, _Z21__gen_ocl_read_imageijtfffj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE12, _Z22__gen_ocl_read_imageuijtiiij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE13, _Z22__gen_ocl_read_imageuijtfffj) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE14, _Z21__gen_ocl_read_imagefjtiiij) -DECL_LLVM_GEN_FUNCTION(READ_IMAGE15, _Z21__gen_ocl_read_imagefjtfffj) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I, _Z21__gen_ocl_read_imageijtffj) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI, _Z22__gen_ocl_read_imageuijtffj) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F, _Z21__gen_ocl_read_imagefjtffj) + +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D, _Z21__gen_ocl_read_imageijtfffj) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D, _Z22__gen_ocl_read_imageuijtfffj) +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D, _Z21__gen_ocl_read_imagefjtfffj) // To write_image functions. -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE0, _Z22__gen_ocl_write_imageijiiDv4_i) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE1, _Z22__gen_ocl_write_imageijffDv4_i) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE4, _Z22__gen_ocl_write_imagefjiiDv4_f) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE5, _Z22__gen_ocl_write_imagefjffDv4_f) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE2, _Z23__gen_ocl_write_imageuijiiDv4_j) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE3, _Z23__gen_ocl_write_imageuijffDv4_j) - -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE10, _Z22__gen_ocl_write_imageijiiiDv4_i) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE11, _Z22__gen_ocl_write_imageijfffDv4_i) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE12, _Z23__gen_ocl_write_imageuijiiiDv4_j) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE13, _Z23__gen_ocl_write_imageuijfffDv4_j) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE14, _Z22__gen_ocl_write_imagefjiiiDv4_f) -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE15, _Z22__gen_ocl_write_imagefjfffDv4_f) +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I, _Z22__gen_ocl_write_imageijiiDv4_i) +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI, _Z23__gen_ocl_write_imageuijiiDv4_j) +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F, _Z22__gen_ocl_write_imagefjiiDv4_f) + +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D, _Z22__gen_ocl_write_imageijiiiDv4_i) +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D, _Z23__gen_ocl_write_imageuijiiiDv4_j) +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_3D, _Z22__gen_ocl_write_imagefjiiiDv4_f) // To get image info function DECL_LLVM_GEN_FUNCTION(GET_IMAGE_WIDTH, __gen_ocl_get_image_width) @@ -181,4 +169,4 @@ DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U32, _Z16convert_uint_sati) DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U32, _Z16convert_uint_satf) DECL_LLVM_GEN_FUNCTION(CONV_F16_TO_F32, __gen_ocl_f16to32) -DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16) \ No newline at end of file +DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16) diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp index c1790f7..7095473 100644 --- a/backend/src/llvm/llvm_scalarize.cpp +++ b/backend/src/llvm/llvm_scalarize.cpp @@ -643,18 +643,12 @@ namespace gbe { switch (it->second) { default: break; - case GEN_OCL_READ_IMAGE0: - case GEN_OCL_READ_IMAGE1: - case GEN_OCL_READ_IMAGE2: - case GEN_OCL_READ_IMAGE3: - case GEN_OCL_READ_IMAGE4: - case GEN_OCL_READ_IMAGE5: - case GEN_OCL_READ_IMAGE10: - case GEN_OCL_READ_IMAGE11: - case GEN_OCL_READ_IMAGE12: - case GEN_OCL_READ_IMAGE13: - case GEN_OCL_READ_IMAGE14: - case GEN_OCL_READ_IMAGE15: + case GEN_OCL_READ_IMAGE_I: + case GEN_OCL_READ_IMAGE_UI: + case GEN_OCL_READ_IMAGE_F: + case GEN_OCL_READ_IMAGE_I_3D: + case GEN_OCL_READ_IMAGE_UI_3D: + case GEN_OCL_READ_IMAGE_F_3D: case GEN_OCL_GET_IMAGE_WIDTH: case GEN_OCL_GET_IMAGE_HEIGHT: { @@ -662,19 +656,13 @@ namespace gbe { extractFromVector(call); break; } - case GEN_OCL_WRITE_IMAGE10: - case GEN_OCL_WRITE_IMAGE11: - case GEN_OCL_WRITE_IMAGE12: - case GEN_OCL_WRITE_IMAGE13: - case GEN_OCL_WRITE_IMAGE14: - case GEN_OCL_WRITE_IMAGE15: + case GEN_OCL_WRITE_IMAGE_I_3D: + case GEN_OCL_WRITE_IMAGE_UI_3D: + case GEN_OCL_WRITE_IMAGE_F_3D: CI++; - case GEN_OCL_WRITE_IMAGE0: - case GEN_OCL_WRITE_IMAGE1: - case GEN_OCL_WRITE_IMAGE2: - case GEN_OCL_WRITE_IMAGE3: - case GEN_OCL_WRITE_IMAGE4: - case GEN_OCL_WRITE_IMAGE5: + case GEN_OCL_WRITE_IMAGE_I: + case GEN_OCL_WRITE_IMAGE_UI: + case GEN_OCL_WRITE_IMAGE_F: { *CI = InsertToVector(call, *CI); break; diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 9dec3a5..307f562 100755 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -4487,33 +4487,21 @@ int __gen_ocl_force_simd16(void); // Image access functions ///////////////////////////////////////////////////////////////////////////// -//OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset); OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset); -//OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset); OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset); -//OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, uint sampler_offset); OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, uint sampler_offset); -//OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset); OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset); -//OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset); OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset); -//OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, int u, int v, int w, uint sampler_offset); OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t sampler, float u, float v, float w, uint sampler_offset); OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int4 color); -//OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, float u, float v, int4 color); OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, uint4 color); -//OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, float u, float v, uint4 color); OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, float4 color); -//OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, float u, float v, float4 color); OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int w, int4 color); -//OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, float u, float v, float w, int4 color); OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, int w, uint4 color); -//OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, float u, float v, float w, uint4 color); OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, int w, float4 color); -//OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, float u, float v, float w, float4 color); int __gen_ocl_get_image_width(uint surface_id); int __gen_ocl_get_image_height(uint surface_id); int __gen_ocl_get_image_channel_data_type(uint surface_id); -- 2.7.4