aco: lower masked swizzle to DPP8

author Tatsuyuki Ishi <ishitatsuyuki@gmail.com>

Sun, 28 Nov 2021 15:12:18 +0000 (00:12 +0900)

committer Marge Bot <emma+marge@anholt.net>

Fri, 31 Dec 2021 20:56:39 +0000 (20:56 +0000)
author Tatsuyuki Ishi <ishitatsuyuki@gmail.com>
Sun, 28 Nov 2021 15:12:18 +0000 (00:12 +0900)
committer Marge Bot <emma+marge@anholt.net>
Fri, 31 Dec 2021 20:56:39 +0000 (20:56 +0000)
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp

index 04adddd..bd9eff7 100644 (file)
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -243,7 +243,6 @@ emit_masked_swizzle(isel_context* ctx, Builder& bld, Temp src, unsigned mask)
  
        uint16_t dpp_ctrl = 0xffff;
  
-      // TODO: we could use DPP8 for some swizzles
        if (and_mask == 0x1f && or_mask < 4 && xor_mask < 4) {
           unsigned res[4] = {0, 1, 2, 3};
           for (unsigned i = 0; i < 4; i++)
@@ -255,6 +254,13 @@ emit_masked_swizzle(isel_context* ctx, Builder& bld, Temp src, unsigned mask)
           dpp_ctrl = dpp_row_mirror;
        } else if (and_mask == 0x1f && !or_mask && xor_mask == 0x7) {
           dpp_ctrl = dpp_row_half_mirror;
+      } else if (ctx->options->chip_class >= GFX10 && (and_mask & 0x18) == 0x18 && or_mask < 8 && xor_mask < 8) {
+         // DPP8 comes last, as it does not allow several modifiers like `abs` that are available with DPP16
+         Builder::Result ret = bld.vop1_dpp8(aco_opcode::v_mov_b32, bld.def(v1), src);
+         for (unsigned i = 0; i < 8; i++) {
+            ret.instr->dpp8().lane_sel[i] = (((i & and_mask) | or_mask) ^ xor_mask) & 0x7;
+         }
+         return ret;
        }
  
        if (dpp_ctrl != 0xffff)
author	Tatsuyuki Ishi <ishitatsuyuki@gmail.com>
	Sun, 28 Nov 2021 15:12:18 +0000 (00:12 +0900)
committer	Marge Bot <emma+marge@anholt.net>
	Fri, 31 Dec 2021 20:56:39 +0000 (20:56 +0000)