aco: add emission support for register-allocated sdwa sels
authorRhys Perry <pendingchaos02@gmail.com>
Fri, 7 Feb 2020 12:08:09 +0000 (12:08 +0000)
committerDaniel Schürmann <daniel@schuermann.dev>
Fri, 3 Apr 2020 22:13:15 +0000 (23:13 +0100)
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-By: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4002>

src/amd/compiler/aco_assembler.cpp
src/amd/compiler/aco_ir.h

index 33bc612..8e89568 100644 (file)
@@ -28,6 +28,18 @@ struct asm_context {
    int subvector_begin_pos = -1;
 };
 
+static uint32_t get_sdwa_sel(unsigned sel, PhysReg reg)
+{
+   if (sel & sdwa_isra) {
+      unsigned size = sdwa_rasize & sel;
+      if (size == 1)
+         return reg.byte();
+      else /* size == 2 */
+         return sdwa_isword | (reg.byte() >> 1);
+   }
+   return sel & sdwa_asuint;
+}
+
 void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr)
 {
    uint32_t instr_offset = out.size() * 4u;
@@ -578,20 +590,22 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
             }
             encoding |= (sdwa->clamp ? 1 : 0) << 13;
          } else {
-            encoding |= (uint32_t)(sdwa->dst_sel & sdwa_asuint) << 8;
+            encoding |= get_sdwa_sel(sdwa->dst_sel, instr->definitions[0].physReg()) << 8;
             uint32_t dst_u = sdwa->dst_sel & sdwa_sext ? 1 : 0;
+            if (sdwa->dst_preserve || (sdwa->dst_sel & sdwa_isra))
+               dst_u = 2;
             encoding |= dst_u << 11;
             encoding |= (sdwa->clamp ? 1 : 0) << 13;
             encoding |= sdwa->omod << 14;
          }
 
-         encoding |= (uint32_t)(sdwa->sel[0] & sdwa_asuint) << 16;
+         encoding |= get_sdwa_sel(sdwa->sel[0], sdwa_op.physReg()) << 16;
          encoding |= sdwa->sel[0] & sdwa_sext ? 1 << 19 : 0;
          encoding |= sdwa->abs[0] << 21;
          encoding |= sdwa->neg[0] << 20;
 
          if (instr->operands.size() >= 2) {
-            encoding |= (uint32_t)(sdwa->sel[1] & sdwa_asuint) << 24;
+            encoding |= get_sdwa_sel(sdwa->sel[1], instr->operands[1].physReg()) << 24;
             encoding |= sdwa->sel[1] & sdwa_sext ? 1 << 27 : 0;
             encoding |= sdwa->abs[1] << 29;
             encoding |= sdwa->neg[1] << 28;
index 2679fbd..7e010db 100644 (file)
@@ -880,11 +880,13 @@ enum sdwa_sel : uint8_t {
     /* masks */
     sdwa_wordnum = 0x1,
     sdwa_bytenum = 0x3,
-    sdwa_asuint = 0x7,
+    sdwa_asuint = 0x7 | 0x10,
+    sdwa_rasize = 0x3,
 
     /* flags */
     sdwa_isword = 0x4,
     sdwa_sext = 0x8,
+    sdwa_isra = 0x10,
 
     /* specific values */
     sdwa_ubyte0 = 0,
@@ -902,6 +904,12 @@ enum sdwa_sel : uint8_t {
     sdwa_sword0 = sdwa_uword0 | sdwa_sext,
     sdwa_sword1 = sdwa_uword1 | sdwa_sext,
     sdwa_sdword = sdwa_udword | sdwa_sext,
+
+    /* register-allocated */
+    sdwa_ubyte = 1 | sdwa_isra,
+    sdwa_uword = 2 | sdwa_isra,
+    sdwa_sbyte = sdwa_ubyte | sdwa_sext,
+    sdwa_sword = sdwa_uword | sdwa_sext,
 };
 
 /**
@@ -915,7 +923,7 @@ enum sdwa_sel : uint8_t {
 struct SDWA_instruction : public Instruction {
    /* these destination modifiers aren't available with VOPC except for
     * clamp on GFX8 */
-   unsigned dst_sel:4;
+   unsigned dst_sel:8;
    bool dst_preserve:1;
    bool clamp:1;
    unsigned omod:2; /* GFX9+ */