From 685c21604c1dbf5908c6918829abddc24592354b Mon Sep 17 00:00:00 2001 From: Zhigang Gong Date: Fri, 13 Sep 2013 13:52:26 +0800 Subject: [PATCH] GBE: fixed the broken 3d image support. Signed-off-by: Zhigang Gong Reviewed-by: He Junyan --- backend/src/backend/gen_context.cpp | 7 ++- backend/src/backend/gen_encoder.cpp | 5 +- backend/src/backend/gen_encoder.hpp | 1 + backend/src/llvm/llvm_gen_backend.cpp | 12 +---- backend/src/ocl_stdlib.tmpl.h | 89 +++++++++++++++-------------------- 5 files changed, 48 insertions(+), 66 deletions(-) diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index f936cd1..070c5a8 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -1094,6 +1094,7 @@ namespace gbe const GenRegister vcoord = ra->genReg(insn.src(5)); const GenRegister wcoord = ra->genReg(insn.src(6)); uint32_t simdWidth = p->curr.execWidth; + uint32_t coord_cnt = 2; p->push(); const uint32_t nr = msgPayload.nr; // prepare mesg desc and move to a0.0. @@ -1101,9 +1102,11 @@ namespace gbe /* Prepare message payload. */ p->MOV(GenRegister::f8grf(nr , 0), ucoord); p->MOV(GenRegister::f8grf(nr + (simdWidth/8), 0), vcoord); - if (insn.src(6).reg() != 0) + if (insn.src(6).reg() != 0) { p->MOV(GenRegister::f8grf(nr + (simdWidth/4), 0), wcoord); - p->SAMPLE(dst, msgPayload, false, bti, sampler, simdWidth, -1, 0); + coord_cnt++; + } + p->SAMPLE(dst, msgPayload, false, bti, sampler, coord_cnt, simdWidth, -1, 0); p->pop(); } diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index 35e2cee..d2d1655 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -1168,15 +1168,16 @@ namespace gbe bool header_present, unsigned char bti, unsigned char sampler, + unsigned int coord_cnt, uint32_t simdWidth, uint32_t writemask, uint32_t return_format) { if (writemask == 0) return; - uint32_t msg_type = (simdWidth == 16) ? + uint32_t msg_type = (simdWidth == 16) ? GEN_SAMPLER_MESSAGE_SIMD16_SAMPLE : GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE; uint32_t response_length = (4 * (simdWidth / 8)); - uint32_t msg_length = (2 * (simdWidth / 8)); + uint32_t msg_length = (coord_cnt * (simdWidth / 8)); if (header_present) msg_length++; uint32_t simd_mode = (simdWidth == 16) ? diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp index d90033e..bb88484 100644 --- a/backend/src/backend/gen_encoder.hpp +++ b/backend/src/backend/gen_encoder.hpp @@ -168,6 +168,7 @@ namespace gbe bool header_present, unsigned char bti, unsigned char sampler, + unsigned int coord_cnt, unsigned int simdWidth, uint32_t writemask, uint32_t return_format); diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 859107f..f1d63c5 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2119,11 +2119,7 @@ namespace gbe GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI; ir::Register wcoord; - if (it->second == GEN_OCL_READ_IMAGE10 || - it->second == GEN_OCL_READ_IMAGE11 || - it->second == GEN_OCL_READ_IMAGE12 || - it->second == GEN_OCL_READ_IMAGE13 || - it->second == GEN_OCL_READ_IMAGE14) { + if (it->second >= GEN_OCL_READ_IMAGE10 && it->second <= GEN_OCL_READ_IMAGE15) { GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI; } else wcoord = ir::Register(0); @@ -2192,11 +2188,7 @@ namespace gbe GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI; GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI; ir::Register wcoord; - if(it->second == GEN_OCL_WRITE_IMAGE10 || - it->second == GEN_OCL_WRITE_IMAGE11 || - it->second == GEN_OCL_WRITE_IMAGE12 || - it->second == GEN_OCL_WRITE_IMAGE13 || - it->second == GEN_OCL_WRITE_IMAGE14) { + if(it->second >= GEN_OCL_WRITE_IMAGE10 && it->second <= GEN_OCL_WRITE_IMAGE15) { GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI; } else wcoord = ir::Register(0); diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 4008ad4..75a5e72 100644 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -2260,37 +2260,56 @@ int __gen_ocl_get_image_depth(uint surface_id); #define GET_IMAGE(cl_image, surface_id) \ uint surface_id = (uint)cl_image -#define DECL_READ_IMAGE(type, suffix, coord_type) \ - INLINE_OVERLOADABLE type read_image ##suffix(image2d_t cl_image, sampler_t sampler, coord_type coord) \ +#define DECL_READ_IMAGE(image_type, type, suffix, coord_type) \ + INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image, sampler_t sampler, coord_type coord) \ {\ GET_IMAGE(cl_image, surface_id);\ - return __gen_ocl_read_image ##suffix(surface_id, sampler, coord.s0, coord.s1);\ + return __gen_ocl_read_image ##suffix(EXPEND_READ_COORD(surface_id, sampler, coord));\ } -#define DECL_READ_IMAGE_NOSAMPLER(type, suffix, coord_type) \ - INLINE_OVERLOADABLE type read_image ##suffix(image2d_t cl_image, coord_type coord) \ +#define DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, coord_type) \ + INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image, coord_type coord) \ {\ GET_IMAGE(cl_image, surface_id);\ - return __gen_ocl_read_image ##suffix(surface_id, CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST, coord.s0, coord.s1);\ + return __gen_ocl_read_image ##suffix(EXPEND_READ_COORD(surface_id, CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST, coord));\ } -#define DECL_WRITE_IMAGE(type, suffix, coord_type) \ - INLINE_OVERLOADABLE void write_image ##suffix(image2d_t cl_image, coord_type coord, type color)\ +#define DECL_WRITE_IMAGE(image_type, type, suffix, coord_type) \ + INLINE_OVERLOADABLE void write_image ##suffix(image_type cl_image, coord_type coord, type color)\ {\ GET_IMAGE(cl_image, surface_id);\ - __gen_ocl_write_image ##suffix(surface_id, coord.s0, coord.s1, color);\ + __gen_ocl_write_image ##suffix(EXPEND_WRITE_COORD(surface_id, coord, color));\ } -#define DECL_IMAGE(type, suffix) \ - DECL_READ_IMAGE(type, suffix, int2) \ - DECL_READ_IMAGE(type, suffix, float2) \ - DECL_READ_IMAGE_NOSAMPLER(type, suffix, int2) \ - DECL_WRITE_IMAGE(type, suffix, int2) \ - DECL_WRITE_IMAGE(type, suffix, float2) +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1 +#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, color -DECL_IMAGE(int4, i) -DECL_IMAGE(uint4, ui) -DECL_IMAGE(float4, f) +#define DECL_IMAGE(image_type, type, suffix, n) \ + DECL_READ_IMAGE(image_type, type, suffix, int ##n) \ + DECL_READ_IMAGE(image_type, type, suffix, float ##n) \ + DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int ##n) \ + DECL_WRITE_IMAGE(image_type, type, suffix, int ## n) \ + DECL_WRITE_IMAGE(image_type, type, suffix, float ## n) + +DECL_IMAGE(image2d_t, int4, i, 2) +DECL_IMAGE(image2d_t, uint4, ui, 2) +DECL_IMAGE(image2d_t, float4, f, 2) + +#undef EXPEND_READ_COORD +#undef EXPEND_WRITE_COORD + +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1, coord.s2 +#define EXPEND_WRITE_COORD(id, coord, color) id, coord.s0, coord.s1, coord.s2, color + +DECL_IMAGE(image3d_t, int4, i, 4) +DECL_IMAGE(image3d_t, uint4, ui, 4) +DECL_IMAGE(image3d_t, float4, f, 4) + +DECL_IMAGE(image3d_t, int4, i, 3) +DECL_IMAGE(image3d_t, uint4, ui, 3) +DECL_IMAGE(image3d_t, float4, f, 3) +#undef EXPEND_READ_COORD +#undef EXPEND_WRITE_COORD #undef DECL_IMAGE #undef DECL_READ_IMAGE @@ -2353,40 +2372,6 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image) { return __gen_ocl_get_image_array_size(image); } #endif -#define DECL_READ_IMAGE(type, suffix, coord_type) \ - INLINE_OVERLOADABLE type read_image ## suffix(image3d_t cl_image, sampler_t sampler, coord_type coord) \ - {\ - GET_IMAGE(cl_image, surface_id);\ - return __gen_ocl_read_image ## suffix(surface_id, (uint)sampler, coord.s0, coord.s1, coord.s2);\ - } - -#define DECL_READ_IMAGE_NOSAMPLER(type, suffix, coord_type) \ - INLINE_OVERLOADABLE type read_image ## suffix(image3d_t cl_image, coord_type coord) \ - {\ - GET_IMAGE(cl_image, surface_id);\ - return __gen_ocl_read_image ## suffix(surface_id, CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST, coord.s0, coord.s1, coord.s2);\ - } - -#define DECL_WRITE_IMAGE(type, suffix, coord_type) \ - INLINE_OVERLOADABLE void write_image ## suffix(image3d_t cl_image, coord_type coord, type color)\ - {\ - GET_IMAGE(cl_image, surface_id);\ - __gen_ocl_write_image ## suffix(surface_id, coord.s0, coord.s1, coord.s2, color);\ - } - -#define DECL_IMAGE(type, suffix) \ - DECL_READ_IMAGE(type, suffix, int4) \ - DECL_READ_IMAGE(type, suffix, float4) \ - DECL_READ_IMAGE_NOSAMPLER(type, suffix, int4) \ - DECL_WRITE_IMAGE(type, suffix, int4) \ - DECL_WRITE_IMAGE(type, suffix, float4) - -DECL_IMAGE(int4, i) -DECL_IMAGE(uint4, ui) -DECL_IMAGE(float4, f) - - - #pragma OPENCL EXTENSION cl_khr_fp64 : disable #undef DECL_IMAGE -- 2.7.4