aco: add new pseudo instruction p_jump_to_epilog
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 24 Jun 2022 12:17:29 +0000 (14:17 +0200)
committerMarge Bot <emma+marge@anholt.net>
Mon, 18 Jul 2022 18:40:02 +0000 (18:40 +0000)
The first operand of this new pseudo-instruction is a 64-bit SGPR for
the continue PC, followed by a variable list of fixed VGPRS for the
color exports which are the PS epilog inputs.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17485>

src/amd/compiler/aco_insert_waitcnt.cpp
src/amd/compiler/aco_lower_to_hw_instr.cpp
src/amd/compiler/aco_opcodes.py
src/amd/compiler/aco_scheduler.cpp
src/amd/compiler/aco_validate.cpp

index b8be00c..3c04d30 100644 (file)
@@ -359,9 +359,10 @@ force_waitcnt(wait_ctx& ctx, wait_imm& imm)
 void
 kill(wait_imm& imm, Instruction* instr, wait_ctx& ctx, memory_sync_info sync_info)
 {
-   if (debug_flags & DEBUG_FORCE_WAITCNT) {
+   if (instr->opcode == aco_opcode::s_setpc_b64 || (debug_flags & DEBUG_FORCE_WAITCNT)) {
       /* Force emitting waitcnt states right after the instruction if there is
-       * something to wait for.
+       * something to wait for. This is also applied for s_setpc_b64 to ensure
+       * waitcnt states are inserted before jumping to the PS epilog.
        */
       force_waitcnt(ctx, imm);
    }
index 1920dfb..6250b7f 100644 (file)
@@ -2368,6 +2368,10 @@ lower_to_hw_instr(Program* program)
                }
                break;
             }
+            case aco_opcode::p_jump_to_epilog: {
+               bld.sop1(aco_opcode::s_setpc_b64, instr->operands[0]);
+               break;
+            }
             default: break;
             }
          } else if (instr->isBranch()) {
index 820e09b..cb3c731 100644 (file)
@@ -320,6 +320,8 @@ opcode("p_insert") # src1=index, src2=bits
 
 opcode("p_init_scratch")
 
+# jumps to a shader epilog
+opcode("p_jump_to_epilog")
 
 # SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc)
 SOP2 = {
index 0edd786..6cebcf9 100644 (file)
@@ -574,7 +574,7 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards)
    /* don't move non-reorderable instructions */
    if (instr->opcode == aco_opcode::s_memtime || instr->opcode == aco_opcode::s_memrealtime ||
        instr->opcode == aco_opcode::s_setprio || instr->opcode == aco_opcode::s_getreg_b32 ||
-       instr->opcode == aco_opcode::p_init_scratch)
+       instr->opcode == aco_opcode::p_init_scratch || instr->opcode == aco_opcode::p_jump_to_epilog)
       return hazard_fail_unreorderable;
 
    memory_event_set instr_set;
index db013e1..72a033a 100644 (file)
@@ -261,6 +261,7 @@ validate_ir(Program* program)
                bool flat = instr->isFlatLike();
                bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() ||
                                    instr->opcode == aco_opcode::p_create_vector ||
+                                   instr->opcode == aco_opcode::p_jump_to_epilog ||
                                    (flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
                                    ((instr->isMUBUF() || instr->isMTBUF()) && i == 1) ||
                                    (instr->isScratch() && i == 0);
@@ -511,6 +512,18 @@ validate_ir(Program* program)
                unsigned comp = data_bits / MAX2(op_bits, 1);
                check(instr->operands[1].constantValue() < comp, "Index must be in-bounds",
                      instr.get());
+            } else if (instr->opcode == aco_opcode::p_jump_to_epilog) {
+               check(instr->definitions.size() == 0, "p_jump_to_epilog must have 0 definitions",
+                     instr.get());
+               check(instr->operands.size() > 0 &&
+                        instr->operands[0].getTemp().type() == RegType::sgpr &&
+                        instr->operands[0].getTemp().size() == 2,
+                     "First operand of p_jump_to_epilog must be a SGPR", instr.get());
+               for (unsigned i = 1; i < instr->operands.size(); i++) {
+                  check(instr->operands[i].getTemp().type() == RegType::vgpr ||
+                           instr->operands[i].isUndefined(),
+                        "Other operands of p_jump_to_epilog must be VGPRs or undef", instr.get());
+               }
             }
             break;
          }