aco: create hw_init_scratch() function for p_init_scratch lowering
authorDaniel Schürmann <daniel@schuermann.dev>
Thu, 9 Mar 2023 19:50:06 +0000 (20:50 +0100)
committerMarge Bot <emma+marge@anholt.net>
Thu, 16 Mar 2023 01:40:30 +0000 (01:40 +0000)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21780>

src/amd/compiler/aco_builder_h.py
src/amd/compiler/aco_lower_to_hw_instr.cpp

index 9ab0a2a..9a23867 100644 (file)
@@ -639,6 +639,8 @@ formats = [(f if len(f) == 5 else f + ('',)) for f in formats]
 % endfor
 };
 
+void hw_init_scratch(Builder& bld, Definition def, Operand scratch_addr, Operand scratch_offset);
+
 } // namespace aco
 
 #endif /* _ACO_BUILDER_ */"""
index 4bba62f..80db136 100644 (file)
@@ -2116,6 +2116,37 @@ emit_set_mode_from_block(Builder& bld, Program& program, Block* block, bool alwa
 }
 
 void
+hw_init_scratch(Builder& bld, Definition def, Operand scratch_addr, Operand scratch_offset)
+{
+   /* Since we know what the high 16 bits of scratch_hi is, we can set all the high 16
+    * bits in the same instruction that we add the carry.
+    */
+   Operand hi_add = Operand::c32(0xffff0000 - S_008F04_SWIZZLE_ENABLE_GFX6(1));
+   Operand scratch_addr_lo(scratch_addr.physReg(), s1);
+   Operand scratch_addr_hi(scratch_addr_lo.physReg().advance(4), s1);
+
+   if (bld.program->gfx_level >= GFX10) {
+      PhysReg scratch_lo = def.physReg();
+      PhysReg scratch_hi = def.physReg().advance(4);
+
+      bld.sop2(aco_opcode::s_add_u32, Definition(scratch_lo, s1), Definition(scc, s1),
+               scratch_addr_lo, scratch_offset);
+      bld.sop2(aco_opcode::s_addc_u32, Definition(scratch_hi, s1), Definition(scc, s1),
+               scratch_addr_hi, hi_add, Operand(scc, s1));
+
+      /* "((size - 1) << 11) | register" (FLAT_SCRATCH_LO/HI is encoded as register
+       * 20/21) */
+      bld.sopk(aco_opcode::s_setreg_b32, Operand(scratch_lo, s1), (31 << 11) | 20);
+      bld.sopk(aco_opcode::s_setreg_b32, Operand(scratch_hi, s1), (31 << 11) | 21);
+   } else {
+      bld.sop2(aco_opcode::s_add_u32, Definition(flat_scr_lo, s1), Definition(scc, s1),
+               scratch_addr_lo, scratch_offset);
+      bld.sop2(aco_opcode::s_addc_u32, Definition(flat_scr_hi, s1), Definition(scc, s1),
+               scratch_addr_hi, hi_add, Operand(scc, s1));
+   }
+}
+
+void
 lower_to_hw_instr(Program* program)
 {
    Block* discard_block = NULL;
@@ -2466,39 +2497,13 @@ lower_to_hw_instr(Program* program)
                   break;
 
                Operand scratch_addr = instr->operands[0];
-               Operand scratch_addr_lo(scratch_addr.physReg(), s1);
                if (program->stage.hw != HWStage::CS) {
                   bld.smem(aco_opcode::s_load_dwordx2, instr->definitions[0], scratch_addr,
                            Operand::zero());
-                  scratch_addr_lo.setFixed(instr->definitions[0].physReg());
-               }
-               Operand scratch_addr_hi(scratch_addr_lo.physReg().advance(4), s1);
-
-               /* Since we know what the high 16 bits of scratch_hi is, we can set all the high 16
-                * bits in the same instruction that we add the carry.
-                */
-               uint32_t hi_add = 0xffff0000 - S_008F04_SWIZZLE_ENABLE_GFX6(1);
-
-               if (program->gfx_level >= GFX10) {
-                  Operand scratch_lo(instr->definitions[0].physReg(), s1);
-                  Operand scratch_hi(instr->definitions[0].physReg().advance(4), s1);
-
-                  bld.sop2(aco_opcode::s_add_u32, Definition(scratch_lo.physReg(), s1),
-                           Definition(scc, s1), scratch_addr_lo, instr->operands[1]);
-                  bld.sop2(aco_opcode::s_addc_u32, Definition(scratch_hi.physReg(), s1),
-                           Definition(scc, s1), scratch_addr_hi, Operand::c32(hi_add),
-                           Operand(scc, s1));
-
-                  /* "((size - 1) << 11) | register" (FLAT_SCRATCH_LO/HI is encoded as register
-                   * 20/21) */
-                  bld.sopk(aco_opcode::s_setreg_b32, scratch_lo, (31 << 11) | 20);
-                  bld.sopk(aco_opcode::s_setreg_b32, scratch_hi, (31 << 11) | 21);
-               } else {
-                  bld.sop2(aco_opcode::s_add_u32, Definition(flat_scr_lo, s1), Definition(scc, s1),
-                           scratch_addr_lo, instr->operands[1]);
-                  bld.sop2(aco_opcode::s_addc_u32, Definition(flat_scr_hi, s1), Definition(scc, s1),
-                           scratch_addr_hi, Operand::c32(hi_add), Operand(scc, s1));
+                  scratch_addr.setFixed(instr->definitions[0].physReg());
                }
+
+               hw_init_scratch(bld, instr->definitions[0], scratch_addr, instr->operands[1]);
                break;
             }
             case aco_opcode::p_jump_to_epilog: {