ac: rework ac_build_waitcnt for gfx10
authorMarek Olšák <marek.olsak@amd.com>
Mon, 24 Jun 2019 20:13:24 +0000 (16:13 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 3 Jul 2019 19:51:13 +0000 (15:51 -0400)
Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/common/ac_llvm_build.c
src/amd/common/ac_llvm_build.h
src/amd/common/ac_nir_to_llvm.c
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c

index eb71a69..4c48fe1 100644 (file)
@@ -2882,13 +2882,49 @@ LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
                             LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, "");
 }
 
-void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16)
+void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags)
 {
+       if (!wait_flags)
+               return;
+
+       unsigned lgkmcnt = 63;
+       unsigned expcnt = 7;
+       unsigned vmcnt = ctx->chip_class >= GFX9 ? 63 : 15;
+       unsigned vscnt = 63;
+
+       if (wait_flags & AC_WAIT_LGKM)
+               lgkmcnt = 0;
+       if (wait_flags & AC_WAIT_EXP)
+               expcnt = 0;
+       if (wait_flags & AC_WAIT_VLOAD)
+               vmcnt = 0;
+
+       if (wait_flags & AC_WAIT_VSTORE) {
+               if (ctx->chip_class >= GFX10)
+                       vscnt = 0;
+               else
+                       vmcnt = 0;
+       }
+
+       unsigned simm16 = (lgkmcnt << 8) |
+                         (expcnt << 4) |
+                         (vmcnt & 0xf) |
+                         ((vmcnt >> 4) << 14);
+
        LLVMValueRef args[1] = {
                LLVMConstInt(ctx->i32, simm16, false),
        };
        ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt",
                           ctx->voidt, args, 1, 0);
+
+       /* TODO: add llvm.amdgcn.s.waitcnt.vscnt into LLVM: */
+       if (0 && ctx->chip_class >= GFX10 && vscnt == 0) {
+               LLVMValueRef args[1] = {
+                       LLVMConstInt(ctx->i32, vscnt, false),
+               };
+               ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt.vscnt",
+                                  ctx->voidt, args, 1, 0);
+       }
 }
 
 LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
index 4917315..eba01e5 100644 (file)
@@ -43,11 +43,10 @@ enum {
        AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
 };
 
-/* Combine these with & instead of |. */
-#define NOOP_WAITCNT   0xcf7f
-#define LGKM_CNT       0xc07f
-#define EXP_CNT                0xcf0f
-#define VM_CNT         0x0f70 /* On GFX9, vmcnt has 6 bits in [0:3] and [14:15] */
+#define AC_WAIT_LGKM   (1 << 0) /* LDS, GDS, constant, message */
+#define AC_WAIT_EXP    (1 << 1) /* exports */
+#define AC_WAIT_VLOAD  (1 << 2) /* VMEM load/sample instructions */
+#define AC_WAIT_VSTORE (1 << 3) /* VMEM store instructions */
 
 struct ac_llvm_flow;
 
@@ -575,7 +574,7 @@ LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
 LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
                           LLVMValueRef s1, LLVMValueRef s2);
 
-void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
+void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags);
 
 LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
                           unsigned bitsize);
index 73941ba..e5a9389 100644 (file)
@@ -2741,26 +2741,26 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
 static void emit_membar(struct ac_llvm_context *ac,
                        const nir_intrinsic_instr *instr)
 {
-       unsigned waitcnt = NOOP_WAITCNT;
+       unsigned wait_flags = 0;
 
        switch (instr->intrinsic) {
        case nir_intrinsic_memory_barrier:
        case nir_intrinsic_group_memory_barrier:
-               waitcnt &= VM_CNT & LGKM_CNT;
+               wait_flags = AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
                break;
        case nir_intrinsic_memory_barrier_atomic_counter:
        case nir_intrinsic_memory_barrier_buffer:
        case nir_intrinsic_memory_barrier_image:
-               waitcnt &= VM_CNT;
+               wait_flags = AC_WAIT_VLOAD | AC_WAIT_VSTORE;
                break;
        case nir_intrinsic_memory_barrier_shared:
-               waitcnt &= LGKM_CNT;
+               wait_flags = AC_WAIT_LGKM;
                break;
        default:
                break;
        }
-       if (waitcnt != NOOP_WAITCNT)
-               ac_build_waitcnt(ac, waitcnt);
+
+       ac_build_waitcnt(ac, wait_flags);
 }
 
 void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
@@ -2770,7 +2770,7 @@ void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
         * always fits into a single wave.
         */
        if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) {
-               ac_build_waitcnt(ac, LGKM_CNT & VM_CNT);
+               ac_build_waitcnt(ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
                return;
        }
        ac_build_s_barrier(ac);
index 9a4a416..6bdddb1 100644 (file)
@@ -3908,21 +3908,20 @@ static void membar_emit(
        struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMValueRef src0 = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
        unsigned flags = LLVMConstIntGetZExtValue(src0);
-       unsigned waitcnt = NOOP_WAITCNT;
+       unsigned wait_flags = 0;
 
        if (flags & TGSI_MEMBAR_THREAD_GROUP)
-               waitcnt &= VM_CNT & LGKM_CNT;
+               wait_flags |= AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
 
        if (flags & (TGSI_MEMBAR_ATOMIC_BUFFER |
                     TGSI_MEMBAR_SHADER_BUFFER |
                     TGSI_MEMBAR_SHADER_IMAGE))
-               waitcnt &= VM_CNT;
+               wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
 
        if (flags & TGSI_MEMBAR_SHARED)
-               waitcnt &= LGKM_CNT;
+               wait_flags |= AC_WAIT_LGKM;
 
-       if (waitcnt != NOOP_WAITCNT)
-               ac_build_waitcnt(&ctx->ac, waitcnt);
+       ac_build_waitcnt(&ctx->ac, wait_flags);
 }
 
 static void clock_emit(
@@ -4372,7 +4371,7 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
         */
        if (ctx->screen->info.chip_class == GFX6 &&
            ctx->type == PIPE_SHADER_TESS_CTRL) {
-               ac_build_waitcnt(&ctx->ac, LGKM_CNT & VM_CNT);
+               ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
                return;
        }
 
index 455af80..8dcc1a5 100644 (file)
@@ -520,7 +520,7 @@ static void load_emit(
        }
 
        if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
-               ac_build_waitcnt(&ctx->ac, VM_CNT);
+               ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE);
 
        can_speculate = !(inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) &&
                          is_oneway_access_only(inst, info,
@@ -709,7 +709,7 @@ static void store_emit(
        }
 
        if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
-               ac_build_waitcnt(&ctx->ac, VM_CNT);
+               ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE);
 
        bool is_image = inst->Dst[0].Register.File != TGSI_FILE_BUFFER;
        args.cache_policy = get_cache_policy(ctx, inst,