return false;
}
+
+/// This function is called when we have proved that a SETCC node can be replaced
+/// by subtraction (and other supporting instructions) so that the result of
+/// comparison is kept in a GPR instead of CR. This function is purely for
+/// codegen purposes and has some flags to guide the codegen process.
+static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
+ bool Swap, SDLoc &DL, SelectionDAG &DAG) {
+
+ assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
+
+ // Zero extend the operands to the largest legal integer. Originally, they
+ // must be of a strictly smaller size.
+ auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
+ DAG.getConstant(Size, DL, MVT::i32));
+ auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
+ DAG.getConstant(Size, DL, MVT::i32));
+
+ // Swap if needed. Depends on the condition code.
+ if (Swap)
+ std::swap(Op0, Op1);
+
+ // Subtract extended integers.
+ auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
+
+ // Move the sign bit to the least significant position and zero out the rest.
+ // Now the least significant bit carries the result of original comparison.
+ auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
+ DAG.getConstant(Size - 1, DL, MVT::i32));
+ auto Final = Shifted;
+
+ // Complement the result if needed. Based on the condition code.
+ if (Complement)
+ Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
+ DAG.getConstant(1, DL, MVT::i64));
+
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
+}
+
+SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+
+ assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc DL(N);
+
+ // Size of integers being compared has a critical role in the following
+ // analysis, so we prefer to do this when all types are legal.
+ if (!DCI.isAfterLegalizeVectorOps())
+ return SDValue();
+
+ // If all users of SETCC extend its value to a legal integer type
+ // then we replace SETCC with a subtraction
+ for (SDNode::use_iterator UI = N->use_begin(),
+ UE = N->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+ }
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ auto OpSize = N->getOperand(0).getValueSizeInBits();
+
+ unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
+
+ if (OpSize < Size) {
+ switch (CC) {
+ default: break;
+ case ISD::SETULT:
+ return generateEquivalentSub(N, Size, false, false, DL, DAG);
+ case ISD::SETULE:
+ return generateEquivalentSub(N, Size, true, true, DL, DAG);
+ case ISD::SETUGT:
+ return generateEquivalentSub(N, Size, false, true, DL, DAG);
+ case ISD::SETUGE:
+ return generateEquivalentSub(N, Size, true, false, DL, DAG);
+ }
+ }
+
+ return SDValue();
+}
+
SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
APInt::getHighBitsSet(OpBits, OpBits-1)) ||
!DAG.MaskedValueIsZero(N->getOperand(1),
APInt::getHighBitsSet(OpBits, OpBits-1)))
- return SDValue();
+ return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
+ : SDValue());
} else {
// This is neither a signed nor an unsigned comparison, just make sure
// that the high bits are equal.
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
+ /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
+ /// SETCC with integer subtraction when (1) there is a legal way of doing it
+ /// (2) keeping the result of comparison in GPR has performance benefit.
+ SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const;
+
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &RefinementSteps, bool &UseOneConstNR,
bool Reciprocal) const override;
--- /dev/null
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr8 < %s | FileCheck %s
+
+%class.PB2 = type { [1 x i32], %class.PB1* }
+%class.PB1 = type { [1 x i32], i64, i64, i32 }
+
+; Function Attrs: norecurse nounwind readonly
+define zeroext i1 @test1(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
+entry:
+ %arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
+ %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
+ %and.i = and i32 %0, 8
+ %arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
+ %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
+ %and.i4 = and i32 %1, 8
+ %cmp.i5 = icmp ult i32 %and.i, %and.i4
+ ret i1 %cmp.i5
+
+; CHECK-LABEL: @test1
+; CHECK: rlwinm [[REG1:[0-9]*]]
+; CHECK-NEXT: rlwinm [[REG2:[0-9]*]]
+; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG1]], [[REG2]]
+; CHECK-NEXT: rldicl 3, [[REG3]]
+; CHECK: blr
+
+}
+
+; Function Attrs: norecurse nounwind readonly
+define zeroext i1 @test2(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
+entry:
+ %arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
+ %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
+ %and.i = and i32 %0, 8
+ %arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
+ %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
+ %and.i4 = and i32 %1, 8
+ %cmp.i5 = icmp ule i32 %and.i, %and.i4
+ ret i1 %cmp.i5
+
+; CHECK-LABEL: @test2
+; CHECK: rlwinm [[REG1:[0-9]*]]
+; CHECK-NEXT: rlwinm [[REG2:[0-9]*]]
+; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG2]], [[REG1]]
+; CHECK-NEXT: rldicl [[REG4:[0-9]*]], [[REG3]]
+; CHECK-NEXT: xori 3, [[REG4]], 1
+; CHECK: blr
+
+}
+
+; Function Attrs: norecurse nounwind readonly
+define zeroext i1 @test3(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
+entry:
+ %arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
+ %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
+ %and.i = and i32 %0, 8
+ %arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
+ %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
+ %and.i4 = and i32 %1, 8
+ %cmp.i5 = icmp ugt i32 %and.i, %and.i4
+ ret i1 %cmp.i5
+
+; CHECK-LABEL: @test3
+; CHECK: rlwinm [[REG1:[0-9]*]]
+; CHECK-NEXT: rlwinm [[REG2:[0-9]*]]
+; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG2]], [[REG1]]
+; CHECK-NEXT: rldicl 3, [[REG3]]
+; CHECK: blr
+
+}
+
+; Function Attrs: norecurse nounwind readonly
+define zeroext i1 @test4(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
+entry:
+ %arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
+ %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
+ %and.i = and i32 %0, 8
+ %arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
+ %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
+ %and.i4 = and i32 %1, 8
+ %cmp.i5 = icmp uge i32 %and.i, %and.i4
+ ret i1 %cmp.i5
+
+; CHECK-LABEL: @test4
+; CHECK: rlwinm [[REG1:[0-9]*]]
+; CHECK-NEXT: rlwinm [[REG2:[0-9]*]]
+; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG1]], [[REG2]]
+; CHECK-NEXT: rldicl [[REG4:[0-9]*]], [[REG3]]
+; CHECK-NEXT: xori 3, [[REG4]], 1
+; CHECK: blr
+
+}
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C++ TBAA"}