From 31d839aacc63831e7e13c46d5070672fb09ff844 Mon Sep 17 00:00:00 2001 From: Tatsuyuki Ishi Date: Mon, 29 Nov 2021 00:12:18 +0900 Subject: [PATCH] aco: lower masked swizzle to DPP8 Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 04adddd..bd9eff7 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -243,7 +243,6 @@ emit_masked_swizzle(isel_context* ctx, Builder& bld, Temp src, unsigned mask) uint16_t dpp_ctrl = 0xffff; - // TODO: we could use DPP8 for some swizzles if (and_mask == 0x1f && or_mask < 4 && xor_mask < 4) { unsigned res[4] = {0, 1, 2, 3}; for (unsigned i = 0; i < 4; i++) @@ -255,6 +254,13 @@ emit_masked_swizzle(isel_context* ctx, Builder& bld, Temp src, unsigned mask) dpp_ctrl = dpp_row_mirror; } else if (and_mask == 0x1f && !or_mask && xor_mask == 0x7) { dpp_ctrl = dpp_row_half_mirror; + } else if (ctx->options->chip_class >= GFX10 && (and_mask & 0x18) == 0x18 && or_mask < 8 && xor_mask < 8) { + // DPP8 comes last, as it does not allow several modifiers like `abs` that are available with DPP16 + Builder::Result ret = bld.vop1_dpp8(aco_opcode::v_mov_b32, bld.def(v1), src); + for (unsigned i = 0; i < 8; i++) { + ret.instr->dpp8().lane_sel[i] = (((i & and_mask) | or_mask) ^ xor_mask) & 0x7; + } + return ret; } if (dpp_ctrl != 0xffff) -- 2.7.4