radeonsi: create load_const_buffer_desc_fast_path() helper
authorTimothy Arceri <tarceri@itsqueeze.com>
Mon, 26 Mar 2018 23:26:16 +0000 (10:26 +1100)
committerTimothy Arceri <tarceri@itsqueeze.com>
Mon, 2 Apr 2018 04:56:00 +0000 (14:56 +1000)
This will be shared by the TGSI and NIR backends. For simplicity
we leave the SI LLVM 5.0 and lower work around only in the TGSI
backend.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeonsi/si_shader.c

index 00ebbb9..1661b54 100644 (file)
@@ -2319,6 +2319,49 @@ void si_tgsi_declare_compute_memory(struct si_shader_context *ctx,
        si_declare_compute_memory(ctx);
 }
 
+static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx)
+{
+       LLVMValueRef ptr =
+               LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
+       struct si_shader_selector *sel = ctx->shader->selector;
+
+       /* Do the bounds checking with a descriptor, because
+        * doing computation and manual bounds checking of 64-bit
+        * addresses generates horrible VALU code with very high
+        * VGPR usage and very low SIMD occupancy.
+        */
+       ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
+
+       LLVMValueRef desc0, desc1;
+       if (HAVE_32BIT_POINTERS) {
+               desc0 = ptr;
+               desc1 = LLVMConstInt(ctx->i32,
+                                    S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
+       } else {
+               ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, "");
+               desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_0, "");
+               desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_1, "");
+               /* Mask out all bits except BASE_ADDRESS_HI. */
+               desc1 = LLVMBuildAnd(ctx->ac.builder, desc1,
+                                    LLVMConstInt(ctx->i32, ~C_008F04_BASE_ADDRESS_HI, 0), "");
+       }
+
+       LLVMValueRef desc_elems[] = {
+               desc0,
+               desc1,
+               LLVMConstInt(ctx->i32, (sel->info.const_file_max[0] + 1) * 16, 0),
+               LLVMConstInt(ctx->i32,
+                       S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+                       S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+                       S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+                       S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+                       S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                       S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32), 0)
+       };
+
+       return ac_build_gather_values(&ctx->ac, desc_elems, 4);
+}
+
 static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
 {
        LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn,
@@ -2397,8 +2440,6 @@ static LLVMValueRef fetch_constant(
        /* Fast path when user data SGPRs point to constant buffer 0 directly. */
        if (sel->info.const_buffers_declared == 1 &&
            sel->info.shader_buffers_declared == 0) {
-               LLVMValueRef ptr =
-                       LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
 
                /* This enables use of s_load_dword and flat_load_dword for const buffer 0
                 * loads, and up to x4 load opcode merging. However, it leads to horrible
@@ -2413,48 +2454,17 @@ static LLVMValueRef fetch_constant(
                 * s_buffer_load_dword (that we have to prevent) is when we use use
                 * a literal offset where we don't need bounds checking.
                 */
-               if (ctx->screen->info.chip_class == SI &&
-                    HAVE_LLVM < 0x0600 &&
-                    !reg->Register.Indirect) {
+               if (ctx->screen->info.chip_class == SI && HAVE_LLVM < 0x0600 &&
+                   !reg->Register.Indirect) {
+                       LLVMValueRef ptr =
+                               LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers);
+
                        addr = LLVMBuildLShr(ctx->ac.builder, addr, LLVMConstInt(ctx->i32, 2, 0), "");
                        LLVMValueRef result = ac_build_load_invariant(&ctx->ac, ptr, addr);
                        return bitcast(bld_base, type, result);
                }
 
-               /* Do the bounds checking with a descriptor, because
-                * doing computation and manual bounds checking of 64-bit
-                * addresses generates horrible VALU code with very high
-                * VGPR usage and very low SIMD occupancy.
-                */
-               ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
-
-               LLVMValueRef desc0, desc1;
-               if (HAVE_32BIT_POINTERS) {
-                       desc0 = ptr;
-                       desc1 = LLVMConstInt(ctx->i32,
-                                            S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
-               } else {
-                       ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, "");
-                       desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_0, "");
-                       desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_1, "");
-                       /* Mask out all bits except BASE_ADDRESS_HI. */
-                       desc1 = LLVMBuildAnd(ctx->ac.builder, desc1,
-                                            LLVMConstInt(ctx->i32, ~C_008F04_BASE_ADDRESS_HI, 0), "");
-               }
-
-               LLVMValueRef desc_elems[] = {
-                       desc0,
-                       desc1,
-                       LLVMConstInt(ctx->i32, (sel->info.const_file_max[0] + 1) * 16, 0),
-                       LLVMConstInt(ctx->i32,
-                               S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-                               S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-                               S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-                               S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-                               S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
-                               S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32), 0)
-               };
-               LLVMValueRef desc = ac_build_gather_values(&ctx->ac, desc_elems, 4);
+               LLVMValueRef desc = load_const_buffer_desc_fast_path(ctx);
                LLVMValueRef result = buffer_load_const(ctx, desc, addr);
                return bitcast(bld_base, type, result);
        }