From e239198cdbbf2689ffe2c21ea8feab5b7a4a8a02 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 28 Sep 2022 11:22:52 -0400 Subject: [PATCH] [InstCombine] fold select shuffles with shared operand together We don't combine generic shuffles together in IR, but select shuffles are a special-case because a select shuffle of a select shuffle is just another select shuffle; codegen is expected to efficiently lower those (select shuffles are also the canonical form of a vector select with constant condition). --- .../InstCombine/InstCombineVectorOps.cpp | 50 ++++++++++++++++++++++ llvm/test/Transforms/InstCombine/shuffle_select.ll | 26 ++++++----- 2 files changed, 65 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 9a144de..40acc24 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1961,6 +1961,53 @@ static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) { return {}; } +/// A select shuffle of a select shuffle with a shared operand can be reduced +/// to a single select shuffle. This is an obvious improvement in IR, and the +/// backend is expected to lower select shuffles efficiently. +static Instruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) { + assert(Shuf.isSelect() && "Must have select-equivalent shuffle"); + + Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1); + SmallVector Mask; + Shuf.getShuffleMask(Mask); + unsigned NumElts = Mask.size(); + + // Canonicalize a select shuffle with common operand as Op1. + auto *ShufOp = dyn_cast(Op0); + if (ShufOp && ShufOp->isSelect() && + (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) { + std::swap(Op0, Op1); + ShuffleVectorInst::commuteShuffleMask(Mask, NumElts); + } + + ShufOp = dyn_cast(Op1); + if (!ShufOp || !ShufOp->isSelect() || + (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0)) + return nullptr; + + Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1); + SmallVector Mask1; + ShufOp->getShuffleMask(Mask1); + assert(Mask1.size() == NumElts && "Vector size changed with select shuffle"); + + // Canonicalize common operand (Op0) as X (first operand of first shuffle). + if (Y == Op0) { + std::swap(X, Y); + ShuffleVectorInst::commuteShuffleMask(Mask1, NumElts); + } + + // If the mask chooses from X (operand 0), it stays the same. + // If the mask chooses from the earlier shuffle, the other mask value is + // transferred to the combined select shuffle: + // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M' + SmallVector NewMask(NumElts); + for (unsigned i = 0; i != NumElts; ++i) + NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i]; + + assert(ShuffleVectorInst::isSelectMask(NewMask) && "Unexpected shuffle mask"); + return new ShuffleVectorInst(X, Y, NewMask); +} + static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) { assert(Shuf.isSelect() && "Must have select-equivalent shuffle"); @@ -2065,6 +2112,9 @@ Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) { return &Shuf; } + if (Instruction *I = foldSelectShuffleOfSelectShuffle(Shuf)) + return I; + if (Instruction *I = foldSelectShuffleWith1Binop(Shuf)) return I; diff --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll index 9133a1f..357b8e1 100644 --- a/llvm/test/Transforms/InstCombine/shuffle_select.ll +++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll @@ -1528,10 +1528,12 @@ define <4 x i32> @PR41419(<4 x i32> %v) { ret <4 x i32> %s } +; The shuffle masks in the next 4 tests are identical to make it easier +; to see that we are choosing the correct elements in the new shuffle. + define <5 x i4> @sel_common_op_commute0(<5 x i4> %x, <5 x i4> %y) { ; CHECK-LABEL: @sel_common_op_commute0( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X]], <5 x i4> [[S1]], <5 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> ; CHECK-NEXT: ret <5 x i4> [[S2]] ; %s1 = shufflevector <5 x i4> %x, <5 x i4> %y, <5 x i32> @@ -1541,8 +1543,7 @@ define <5 x i4> @sel_common_op_commute0(<5 x i4> %x, <5 x i4> %y) { define <5 x i4> @sel_common_op_commute1(<5 x i4> %x, <5 x i4> %y) { ; CHECK-LABEL: @sel_common_op_commute1( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X]], <5 x i4> [[S1]], <5 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> ; CHECK-NEXT: ret <5 x i4> [[S2]] ; %s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> @@ -1552,8 +1553,7 @@ define <5 x i4> @sel_common_op_commute1(<5 x i4> %x, <5 x i4> %y) { define <5 x i4> @sel_common_op_commute2(<5 x i4> %x, <5 x i4> %y) { ; CHECK-LABEL: @sel_common_op_commute2( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X]], <5 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> ; CHECK-NEXT: ret <5 x i4> [[S2]] ; %s1 = shufflevector <5 x i4> %x, <5 x i4> %y, <5 x i32> @@ -1563,8 +1563,7 @@ define <5 x i4> @sel_common_op_commute2(<5 x i4> %x, <5 x i4> %y) { define <5 x i4> @sel_common_op_commute3(<5 x i4> %x, <5 x i4> %y) { ; CHECK-LABEL: @sel_common_op_commute3( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X]], <5 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> ; CHECK-NEXT: ret <5 x i4> [[S2]] ; %s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> @@ -1574,8 +1573,7 @@ define <5 x i4> @sel_common_op_commute3(<5 x i4> %x, <5 x i4> %y) { define <5 x i4> @sel_common_op_commute3_poison_mask_elts(<5 x i4> %x, <5 x i4> %y) { ; CHECK-LABEL: @sel_common_op_commute3_poison_mask_elts( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X]], <5 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> ; CHECK-NEXT: ret <5 x i4> [[S2]] ; %s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> @@ -1583,6 +1581,8 @@ define <5 x i4> @sel_common_op_commute3_poison_mask_elts(<5 x i4> %x, <5 x i4> % ret <5 x i4> %s2 } +; negative test - need shared operand + define <5 x i4> @sel_not_common_op_commute3(<5 x i4> %x, <5 x i4> %y, <5 x i4> %z) { ; CHECK-LABEL: @sel_not_common_op_commute3( ; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[Z:%.*]], <5 x i32> @@ -1594,6 +1594,8 @@ define <5 x i4> @sel_not_common_op_commute3(<5 x i4> %x, <5 x i4> %y, <5 x i4> % ret <5 x i4> %s2 } +; negative test - need "select" shuffle, no lane changes + define <5 x i4> @not_sel_common_op(<5 x i4> %x, <5 x i4> %y) { ; CHECK-LABEL: @not_sel_common_op( ; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> @@ -1605,11 +1607,13 @@ define <5 x i4> @not_sel_common_op(<5 x i4> %x, <5 x i4> %y) { ret <5 x i4> %s2 } +; extra use is ok + define <4 x i32> @sel_common_op_extra_use(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @sel_common_op_extra_use( ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[S1]]) -; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[S1]], <4 x i32> [[X]], <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S2]] ; %s1 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> -- 2.7.4