aco: Stylistic changes to emit_gfx10_wave64_bpermute.
authorTimur Kristóf <timur.kristof@gmail.com>
Tue, 13 Dec 2022 08:43:39 +0000 (09:43 +0100)
committerMarge Bot <emma+marge@anholt.net>
Wed, 14 Dec 2022 13:54:04 +0000 (13:54 +0000)
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20293>

src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_lower_to_hw_instr.cpp

index 5258236..d818cab 100644 (file)
@@ -219,12 +219,17 @@ emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data)
       input_data.setLateKill(true);
       same_half.setLateKill(true);
 
-      /* We need one pair of shared VGPRs:
-       * Note, that these have twice the allocation granularity of normal VGPRs */
-      ctx->program->config->num_shared_vgprs = 2 * ctx->program->dev.vgpr_alloc_granule;
+      if (ctx->options->gfx_level <= GFX10_3) {
+         /* We need one pair of shared VGPRs:
+          * Note, that these have twice the allocation granularity of normal VGPRs
+          */
+         ctx->program->config->num_shared_vgprs = 2 * ctx->program->dev.vgpr_alloc_granule;
 
-      return bld.pseudo(aco_opcode::p_bpermute_gfx10w64, bld.def(v1), bld.def(s2), bld.def(s1, scc),
-                        index_x4, input_data, same_half);
+         return bld.pseudo(aco_opcode::p_bpermute_gfx10w64, bld.def(v1), bld.def(s2),
+                           bld.def(s1, scc), index_x4, input_data, same_half);
+      } else {
+         unreachable("emit_bpermute does not yet support GFX11+");
+      }
    } else {
       /* GFX8-9 or GFX10 wave32: bpermute works normally */
       Temp index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), index);
index 3d7f1f3..b4fadd8 100644 (file)
@@ -848,7 +848,7 @@ emit_gfx10_wave64_bpermute(Program* program, aco_ptr<Instruction>& instr, Builde
     * manually swap the data between the two halves using two shared VGPRs.
     */
 
-   assert(program->gfx_level >= GFX10);
+   assert(program->gfx_level >= GFX10 && program->gfx_level <= GFX10_3);
    assert(program->wave_size == 64);
 
    unsigned shared_vgpr_reg_0 = align(program->config->num_vgprs, 4) + 256;
@@ -907,8 +907,9 @@ emit_gfx10_wave64_bpermute(Program* program, aco_ptr<Instruction>& instr, Builde
    /* Restore saved EXEC */
    bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(tmp_exec.physReg(), s2));
 
-   /* RA assumes that the result is always in the low part of the register, so we have to shift, if
-    * it's not there already */
+   /* RA assumes that the result is always in the low part of the register, so we have to shift,
+    * if it's not there already.
+    */
    if (input_data.physReg().byte()) {
       unsigned right_shift = input_data.physReg().byte() * 8;
       bld.vop2(aco_opcode::v_lshrrev_b32, dst, Operand::c32(right_shift),