r600g: Implement gpu_shader5 textureGather
authorGlenn Kennard <glenn.kennard@gmail.com>
Sun, 20 Jul 2014 13:59:16 +0000 (15:59 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 1 Aug 2014 14:19:47 +0000 (16:19 +0200)
Adds 0-3 textureGather component selection and non-constant offsets

Caveat: 0 and 1 texture swizzles only work if textureGather component
select is 3 or a component that does not exist in the sampler texture
format. This is a hardware limitation, any other value returns
128/255=0.501961 for both 0 and 1.

Passes all textureGather piglit tests on radeon 6670, except for those
using 0/1 texture swizzles due to aforementioned reason.

Signed-off-by: Glenn Kennard <glenn.kennard@gmail.com>
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
docs/GL3.txt
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_shader.c

index 59b20dc..e241257 100644 (file)
@@ -106,7 +106,7 @@ GL 4.0:
   - Implicit signed -> unsigned conversions            DONE
   - Fused multiply-add                                 DONE (i965, nvc0)
   - Packing/bitfield/conversion functions              DONE (i965, nvc0, r600)
-  - Enhanced textureGather                             DONE (i965, nvc0, radeonsi)
+  - Enhanced textureGather                             DONE (i965, nvc0, r600, radeonsi)
   - Geometry shader instancing                         DONE (i965, nvc0)
   - Geometry shader multiple streams                   DONE (i965, nvc0)
   - Enhanced per-sample shading                        DONE (i965, r600)
@@ -118,7 +118,7 @@ GL 4.0:
   GL_ARB_tessellation_shader                           started (Fabian)
   GL_ARB_texture_buffer_object_rgb32                   DONE (i965, nvc0, r600, radeonsi, softpipe)
   GL_ARB_texture_cube_map_array                        DONE (i965, nv50, nvc0, r600, radeonsi, softpipe)
-  GL_ARB_texture_gather                                DONE (i965, nv50, nvc0, radeonsi, r600)
+  GL_ARB_texture_gather                                DONE (i965, nv50, nvc0, r600, radeonsi)
   GL_ARB_texture_query_lod                             DONE (i965, nv50, nvc0, r600, radeonsi)
   GL_ARB_transform_feedback2                           DONE (i965, nv50, nvc0, r600, radeonsi)
   GL_ARB_transform_feedback3                           DONE (i965, nv50, nvc0, r600, radeonsi)
index 39edbcf..a08e70e 100644 (file)
@@ -303,9 +303,11 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
        case PIPE_CAP_CUBE_MAP_ARRAY:
        case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
-       case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+       case PIPE_CAP_TEXTURE_GATHER_SM5:
        case PIPE_CAP_TEXTURE_QUERY_LOD:
                return family >= CHIP_CEDAR ? 1 : 0;
+       case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+               return family >= CHIP_CEDAR ? 4 : 0;
 
        /* Unsupported features. */
        case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
@@ -314,7 +316,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
        case PIPE_CAP_VERTEX_COLOR_CLAMPED:
        case PIPE_CAP_USER_VERTEX_BUFFERS:
-       case PIPE_CAP_TEXTURE_GATHER_SM5:
        case PIPE_CAP_SAMPLE_SHADING:
        case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
        case PIPE_CAP_DRAW_INDIRECT:
index 00b2f4a..e3407d5 100644 (file)
@@ -5251,6 +5251,35 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
        }
 
        opcode = ctx->inst_info->op;
+       if (opcode == FETCH_OP_GATHER4 &&
+               inst->TexOffsets[0].File != TGSI_FILE_NULL &&
+               inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) {
+               opcode = FETCH_OP_GATHER4_O;
+
+               /* GATHER4_O/GATHER4_C_O use offset values loaded by
+                  SET_TEXTURE_OFFSETS instruction. The immediate offset values
+                  encoded in the instruction are ignored. */
+               memset(&tex, 0, sizeof(struct r600_bytecode_tex));
+               tex.op = FETCH_OP_SET_TEXTURE_OFFSETS;
+               tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
+               tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
+
+               tex.src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index;
+               tex.src_sel_x = inst->TexOffsets[0].SwizzleX;
+               tex.src_sel_y = inst->TexOffsets[0].SwizzleY;
+               tex.src_sel_z = inst->TexOffsets[0].SwizzleZ;
+               tex.src_sel_w = 4;
+
+               tex.dst_sel_x = 7;
+               tex.dst_sel_y = 7;
+               tex.dst_sel_z = 7;
+               tex.dst_sel_w = 7;
+
+               r = r600_bytecode_add_tex(ctx->bc, &tex);
+               if (r)
+                       return r;
+       }
+
        if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
            inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
            inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
@@ -5273,10 +5302,10 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
                        break;
                /* Texture gather variants */
                case FETCH_OP_GATHER4:
-                       tex.op = FETCH_OP_GATHER4_C;
+                       opcode = FETCH_OP_GATHER4_C;
                        break;
                case FETCH_OP_GATHER4_O:
-                       tex.op = FETCH_OP_GATHER4_C_O;
+                       opcode = FETCH_OP_GATHER4_C_O;
                        break;
                }
        }
@@ -5352,7 +5381,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
        tex.offset_x = offset_x;
        tex.offset_y = offset_y;
        if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 &&
-               inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) {
+               (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
+                inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)) {
                tex.offset_z = 0;
        }
        else {