From 97d1bc44544f27f7e8175588e24394ab68c5d521 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 1 Apr 2019 13:36:40 +0000 Subject: [PATCH] [InstCombine] eliminate commuted select-shuffles + binop (PR41304) If we have a commutable vector binop with inverted select-shuffles, we don't care about the order of the operands in each vector lane: LHS = shuffle V1, V2, <0, 5, 6, 3> RHS = shuffle V2, V1, <0, 5, 6, 3> LHS + RHS --> --> V1 + V2 PR41304: https://bugs.llvm.org/show_bug.cgi?id=41304 ...is currently titled as an SLP enhancement, but at least for the given example, we can reduce that in instcombine because we are just eliminating shuffles. As noted in the TODO, this could be generalized, but I haven't thought through those patterns completely, so this is limited to what appears to be always safe. Differential Revision: https://reviews.llvm.org/D60048 llvm-svn: 357382 --- .../InstCombine/InstructionCombining.cpp | 24 ++++++++ .../Transforms/InstCombine/vec-binop-select.ll | 71 ++++++++++++++++------ 2 files changed, 75 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 4cc81b9..230a429 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1415,6 +1415,30 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) { return createBinOpShuffle(V1, V2, Mask); } + // If both arguments of a commutative binop are select-shuffles that use the + // same mask with commuted operands, the shuffles are unnecessary. + if (Inst.isCommutative() && + match(LHS, m_ShuffleVector(m_Value(V1), m_Value(V2), m_Constant(Mask))) && + match(RHS, m_ShuffleVector(m_Specific(V2), m_Specific(V1), + m_Specific(Mask)))) { + auto *LShuf = cast(LHS); + auto *RShuf = cast(RHS); + // TODO: Allow shuffles that contain undefs in the mask? + // That is legal, but it reduces undef knowledge. + // TODO: Allow arbitrary shuffles by shuffling after binop? + // That might be legal, but we have to deal with poison. + if (LShuf->isSelect() && !LShuf->getMask()->containsUndefElement() && + RShuf->isSelect() && !RShuf->getMask()->containsUndefElement()) { + // Example: + // LHS = shuffle V1, V2, <0, 5, 6, 3> + // RHS = shuffle V2, V1, <0, 5, 6, 3> + // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2 + Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2); + NewBO->copyIRFlags(&Inst); + return NewBO; + } + } + // If one argument is a shuffle within one vector and the other is a constant, // try moving the shuffle after the binary operation. This canonicalization // intends to move shuffles closer to other shuffles and binops closer to diff --git a/llvm/test/Transforms/InstCombine/vec-binop-select.ll b/llvm/test/Transforms/InstCombine/vec-binop-select.ll index 894684b..abf4729 100644 --- a/llvm/test/Transforms/InstCombine/vec-binop-select.ll +++ b/llvm/test/Transforms/InstCombine/vec-binop-select.ll @@ -5,9 +5,7 @@ define <4 x i32> @and(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @and( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = and <4 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = and <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> @@ -18,9 +16,7 @@ define <4 x i32> @and(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @or(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @or( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = or <4 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = or <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> @@ -33,9 +29,7 @@ define <4 x i32> @or(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @xor(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @xor( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Y]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = xor <4 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = xor <4 x i32> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> @@ -48,13 +42,56 @@ define <4 x i32> @xor(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @add(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @add( +; CHECK-NEXT: [[R:%.*]] = add nsw <4 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <4 x i32> [[R]] +; + %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> + %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> + %r = add nsw <4 x i32> %sel1, %sel2 + ret <4 x i32> %r +} + +; Negative test - wrong operand + +define <4 x i32> @add_wrong_op(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: @add_wrong_op( ; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> +; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[Z:%.*]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> - %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> + %sel2 = shufflevector <4 x i32> %y, <4 x i32> %z, <4 x i32> + %r = add nsw <4 x i32> %sel1, %sel2 + ret <4 x i32> %r +} + +; Negative test - wrong mask (but we could handle this...) + +define <4 x i32> @add_non_select_mask(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @add_non_select_mask( +; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: ret <4 x i32> [[R]] +; + %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> + %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> + %r = add nsw <4 x i32> %sel1, %sel2 + ret <4 x i32> %r +} + +; Negative test - wrong mask (but we could handle this...) + +define <4 x i32> @add_masks_with_undefs(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @add_masks_with_undefs( +; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: ret <4 x i32> [[R]] +; + %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> + %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> %r = add nsw <4 x i32> %sel1, %sel2 ret <4 x i32> %r } @@ -76,9 +113,7 @@ define <4 x i32> @sub(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @mul(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @mul( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = mul nuw <4 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = mul nuw <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> @@ -180,9 +215,7 @@ define <4 x i32> @ashr(<4 x i32> %x, <4 x i32> %y) { define <4 x float> @fadd(<4 x float> %x, <4 x float> %y) { ; CHECK-LABEL: @fadd( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[X:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret <4 x float> [[R]] ; %sel1 = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> @@ -206,9 +239,7 @@ define <4 x float> @fsub(<4 x float> %x, <4 x float> %y) { define <4 x double> @fmul(<4 x double> %x, <4 x double> %y) { ; CHECK-LABEL: @fmul( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[X:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x double> [[X]], <4 x double> [[Y]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = fmul nnan <4 x double> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = fmul nnan <4 x double> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret <4 x double> [[R]] ; %sel1 = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> -- 2.7.4