From 3a9278b92cb5595d1dae604c409abe8f1a9be5b3 Mon Sep 17 00:00:00 2001 From: Glenn Kennard Date: Sun, 20 Jul 2014 15:59:16 +0200 Subject: [PATCH] r600g: Implement gpu_shader5 textureGather MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Adds 0-3 textureGather component selection and non-constant offsets Caveat: 0 and 1 texture swizzles only work if textureGather component select is 3 or a component that does not exist in the sampler texture format. This is a hardware limitation, any other value returns 128/255=0.501961 for both 0 and 1. Passes all textureGather piglit tests on radeon 6670, except for those using 0/1 texture swizzles due to aforementioned reason. Signed-off-by: Glenn Kennard Signed-off-by: Marek Olšák --- docs/GL3.txt | 4 ++-- src/gallium/drivers/r600/r600_pipe.c | 5 +++-- src/gallium/drivers/r600/r600_shader.c | 36 +++++++++++++++++++++++++++++++--- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 59b20dc..e241257 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -106,7 +106,7 @@ GL 4.0: - Implicit signed -> unsigned conversions DONE - Fused multiply-add DONE (i965, nvc0) - Packing/bitfield/conversion functions DONE (i965, nvc0, r600) - - Enhanced textureGather DONE (i965, nvc0, radeonsi) + - Enhanced textureGather DONE (i965, nvc0, r600, radeonsi) - Geometry shader instancing DONE (i965, nvc0) - Geometry shader multiple streams DONE (i965, nvc0) - Enhanced per-sample shading DONE (i965, r600) @@ -118,7 +118,7 @@ GL 4.0: GL_ARB_tessellation_shader started (Fabian) GL_ARB_texture_buffer_object_rgb32 DONE (i965, nvc0, r600, radeonsi, softpipe) GL_ARB_texture_cube_map_array DONE (i965, nv50, nvc0, r600, radeonsi, softpipe) - GL_ARB_texture_gather DONE (i965, nv50, nvc0, radeonsi, r600) + GL_ARB_texture_gather DONE (i965, nv50, nvc0, r600, radeonsi) GL_ARB_texture_query_lod DONE (i965, nv50, nvc0, r600, radeonsi) GL_ARB_transform_feedback2 DONE (i965, nv50, nvc0, r600, radeonsi) GL_ARB_transform_feedback3 DONE (i965, nv50, nvc0, r600, radeonsi) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 39edbcf..a08e70e 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -303,9 +303,11 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: case PIPE_CAP_CUBE_MAP_ARRAY: case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: - case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: + case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_TEXTURE_QUERY_LOD: return family >= CHIP_CEDAR ? 1 : 0; + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: + return family >= CHIP_CEDAR ? 4 : 0; /* Unsupported features. */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: @@ -314,7 +316,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: case PIPE_CAP_VERTEX_COLOR_CLAMPED: case PIPE_CAP_USER_VERTEX_BUFFERS: - case PIPE_CAP_TEXTURE_GATHER_SM5: case PIPE_CAP_SAMPLE_SHADING: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: case PIPE_CAP_DRAW_INDIRECT: diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 00b2f4a..e3407d5 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -5251,6 +5251,35 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } opcode = ctx->inst_info->op; + if (opcode == FETCH_OP_GATHER4 && + inst->TexOffsets[0].File != TGSI_FILE_NULL && + inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) { + opcode = FETCH_OP_GATHER4_O; + + /* GATHER4_O/GATHER4_C_O use offset values loaded by + SET_TEXTURE_OFFSETS instruction. The immediate offset values + encoded in the instruction are ignored. */ + memset(&tex, 0, sizeof(struct r600_bytecode_tex)); + tex.op = FETCH_OP_SET_TEXTURE_OFFSETS; + tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); + tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; + + tex.src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index; + tex.src_sel_x = inst->TexOffsets[0].SwizzleX; + tex.src_sel_y = inst->TexOffsets[0].SwizzleY; + tex.src_sel_z = inst->TexOffsets[0].SwizzleZ; + tex.src_sel_w = 4; + + tex.dst_sel_x = 7; + tex.dst_sel_y = 7; + tex.dst_sel_z = 7; + tex.dst_sel_w = 7; + + r = r600_bytecode_add_tex(ctx->bc, &tex); + if (r) + return r; + } + if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || @@ -5273,10 +5302,10 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) break; /* Texture gather variants */ case FETCH_OP_GATHER4: - tex.op = FETCH_OP_GATHER4_C; + opcode = FETCH_OP_GATHER4_C; break; case FETCH_OP_GATHER4_O: - tex.op = FETCH_OP_GATHER4_C_O; + opcode = FETCH_OP_GATHER4_C_O; break; } } @@ -5352,7 +5381,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.offset_x = offset_x; tex.offset_y = offset_y; if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 && - inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) { + (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || + inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)) { tex.offset_z = 0; } else { -- 2.7.4