From e59d06fe78fd3ec58a247b7af42b59013a61ac5e Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Wed, 3 May 2017 00:07:02 +0000 Subject: [PATCH] [PowerPC, DAGCombiner] Fold a << (b % (sizeof(a) * 8)) back to a single instruction Summary: This is the corresponding llvm change to D28037 to ensure no performance regression. Reviewers: bogner, kbarton, hfinkel, iteratee, echristo Subscribers: nemanjai, llvm-commits Differential Revision: https://reviews.llvm.org/D28329 llvm-svn: 301990 --- llvm/include/llvm/Target/TargetLowering.h | 8 ++++++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 33 +++++++++++++++++++++++ llvm/lib/Target/PowerPC/PPCISelLowering.h | 8 ++++++ llvm/test/CodeGen/PowerPC/shift_mask.ll | 39 --------------------------- 4 files changed, 49 insertions(+), 39 deletions(-) diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index aa92300..ced1838 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -2061,6 +2061,14 @@ public: return false; } + // Return true if the instruction that performs a << b actually performs + // a << (b % (sizeof(a) * 8)). + virtual bool supportsModuloShift(ISD::NodeType Inst, EVT ReturnType) const { + assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) && + "Expect a shift instruction"); + return false; + } + //===--------------------------------------------------------------------===// // Runtime Library hooks // diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index dc0276d..3cd5573 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5294,6 +5294,17 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } } + // If the target supports masking y in (shl, y), + // fold (shl x, (and y, ((1 << numbits(x)) - 1))) -> (shl x, y) + if (TLI.isOperationLegal(ISD::SHL, VT) && + TLI.supportsModuloShift(ISD::SHL, VT) && N1->getOpcode() == ISD::AND) { + if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) { + if (Mask->getZExtValue() == OpSizeInBits - 1) { + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1->getOperand(0)); + } + } + } + ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (shl c1, c2) -> c1< (sra x, y) + if (TLI.isOperationLegal(ISD::SRA, VT) && + TLI.supportsModuloShift(ISD::SRA, VT) && N1->getOpcode() == ISD::AND) { + if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) { + if (Mask->getZExtValue() == OpSizeInBits - 1) { + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, N1->getOperand(0)); + } + } + } + // Arithmetic shifting an all-sign-bit value is a no-op. if (DAG.ComputeNumSignBits(N0) == OpSizeInBits) return N0; @@ -5650,6 +5672,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); + // If the target supports masking y in (srl, y), + // fold (srl x, (and y, ((1 << numbits(x)) - 1))) -> (srl x, y) + if (TLI.isOperationLegal(ISD::SRL, VT) && + TLI.supportsModuloShift(ISD::SRL, VT) && N1->getOpcode() == ISD::AND) { + if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) { + if (Mask->getZExtValue() == OpSizeInBits - 1) { + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1->getOperand(0)); + } + } + } + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 5645fdc..3266109 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1017,6 +1017,14 @@ namespace llvm { SDValue combineElementTruncationToVectorTruncation(SDNode *N, DAGCombinerInfo &DCI) const; + + bool supportsModuloShift(ISD::NodeType Inst, + EVT ReturnType) const override { + assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) && + "Expect a shift instruction"); + assert(isOperationLegal(Inst, ReturnType)); + return ReturnType.isVector(); + } }; namespace PPC { diff --git a/llvm/test/CodeGen/PowerPC/shift_mask.ll b/llvm/test/CodeGen/PowerPC/shift_mask.ll index 91226a3..e9ca9b0 100644 --- a/llvm/test/CodeGen/PowerPC/shift_mask.ll +++ b/llvm/test/CodeGen/PowerPC/shift_mask.ll @@ -49,8 +49,6 @@ define i64 @test003(i64 %a, i64 %b) { define <16 x i8> @test010(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test010: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisb 4, 7 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vslb 2, 2, 3 ; CHECK-NEXT: blr %rem = and <16 x i8> %b, @@ -61,8 +59,6 @@ define <16 x i8> @test010(<16 x i8> %a, <16 x i8> %b) { define <8 x i16> @test011(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test011: ; CHECK: # BB#0: -; CHECK-NEXT: vspltish 4, 15 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vslh 2, 2, 3 ; CHECK-NEXT: blr %rem = and <8 x i16> %b, @@ -73,10 +69,6 @@ define <8 x i16> @test011(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @test012(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test012: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisw 4, -16 -; CHECK-NEXT: vspltisw 5, 15 -; CHECK-NEXT: vsubuwm 4, 5, 4 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vslw 2, 2, 3 ; CHECK-NEXT: blr %rem = and <4 x i32> %b, @@ -87,11 +79,6 @@ define <4 x i32> @test012(<4 x i32> %a, <4 x i32> %b) { define <2 x i64> @test013(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test013: ; CHECK: # BB#0: -; CHECK-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI7_0@toc@l -; CHECK-NEXT: lxvd2x 0, 0, 3 -; CHECK-NEXT: xxswapd 36, 0 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsld 2, 2, 3 ; CHECK-NEXT: blr %rem = and <2 x i64> %b, @@ -148,8 +135,6 @@ define i64 @test103(i64 %a, i64 %b) { define <16 x i8> @test110(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test110: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisb 4, 7 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrb 2, 2, 3 ; CHECK-NEXT: blr %rem = and <16 x i8> %b, @@ -160,8 +145,6 @@ define <16 x i8> @test110(<16 x i8> %a, <16 x i8> %b) { define <8 x i16> @test111(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test111: ; CHECK: # BB#0: -; CHECK-NEXT: vspltish 4, 15 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrh 2, 2, 3 ; CHECK-NEXT: blr %rem = and <8 x i16> %b, @@ -172,10 +155,6 @@ define <8 x i16> @test111(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @test112(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test112: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisw 4, -16 -; CHECK-NEXT: vspltisw 5, 15 -; CHECK-NEXT: vsubuwm 4, 5, 4 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrw 2, 2, 3 ; CHECK-NEXT: blr %rem = and <4 x i32> %b, @@ -186,11 +165,6 @@ define <4 x i32> @test112(<4 x i32> %a, <4 x i32> %b) { define <2 x i64> @test113(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test113: ; CHECK: # BB#0: -; CHECK-NEXT: addis 3, 2, .LCPI15_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI15_0@toc@l -; CHECK-NEXT: lxvd2x 0, 0, 3 -; CHECK-NEXT: xxswapd 36, 0 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrd 2, 2, 3 ; CHECK-NEXT: blr %rem = and <2 x i64> %b, @@ -247,8 +221,6 @@ define i64 @test203(i64 %a, i64 %b) { define <16 x i8> @test210(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test210: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisb 4, 7 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrab 2, 2, 3 ; CHECK-NEXT: blr %rem = and <16 x i8> %b, @@ -259,8 +231,6 @@ define <16 x i8> @test210(<16 x i8> %a, <16 x i8> %b) { define <8 x i16> @test211(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test211: ; CHECK: # BB#0: -; CHECK-NEXT: vspltish 4, 15 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrah 2, 2, 3 ; CHECK-NEXT: blr %rem = and <8 x i16> %b, @@ -271,10 +241,6 @@ define <8 x i16> @test211(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @test212(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test212: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisw 4, -16 -; CHECK-NEXT: vspltisw 5, 15 -; CHECK-NEXT: vsubuwm 4, 5, 4 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsraw 2, 2, 3 ; CHECK-NEXT: blr %rem = and <4 x i32> %b, @@ -285,11 +251,6 @@ define <4 x i32> @test212(<4 x i32> %a, <4 x i32> %b) { define <2 x i64> @test213(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test213: ; CHECK: # BB#0: -; CHECK-NEXT: addis 3, 2, .LCPI23_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI23_0@toc@l -; CHECK-NEXT: lxvd2x 0, 0, 3 -; CHECK-NEXT: xxswapd 36, 0 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrad 2, 2, 3 ; CHECK-NEXT: blr %rem = and <2 x i64> %b, -- 2.7.4