setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
// We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
SDLoc dl(N);
switch (N->getOpcode()) {
default: break;
+ case ISD::ADD:
+ return combineADD(N, DCI);
case ISD::SHL:
return combineSHL(N, DCI);
case ISD::SRA:
return SDValue();
}
+// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
+// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
+// When C is zero, the equation (addi Z, -C) can be simplified to Z
+// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
+static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+ if (!Subtarget.isPPC64())
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ auto isZextOfCompareWithConstant = [](SDValue Op) {
+ if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
+ Op.getValueType() != MVT::i64)
+ return false;
+
+ SDValue Cmp = Op.getOperand(0);
+ if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
+ Cmp.getOperand(0).getValueType() != MVT::i64)
+ return false;
+
+ if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
+ int64_t NegConstant = 0 - Constant->getSExtValue();
+ // Due to the limitations of the addi instruction,
+ // -C is required to be [-32768, 32767].
+ return isInt<16>(NegConstant);
+ }
+
+ return false;
+ };
+
+ bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
+ bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
+
+ // If there is a pattern, canonicalize a zext operand to the RHS.
+ if (LHSHasPattern && !RHSHasPattern)
+ std::swap(LHS, RHS);
+ else if (!LHSHasPattern && !RHSHasPattern)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i64);
+ SDValue Cmp = RHS.getOperand(0);
+ SDValue Z = Cmp.getOperand(0);
+ auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
+
+ assert(Constant && "Constant Should not be a null pointer.");
+ int64_t NegConstant = 0 - Constant->getSExtValue();
+
+ switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
+ default: break;
+ case ISD::SETNE: {
+ // when C == 0
+ // --> addze X, (addic Z, -1).carry
+ // /
+ // add X, (zext(setne Z, C))--
+ // \ when -32768 <= -C <= 32767 && C != 0
+ // --> addze X, (addic (addi Z, -C), -1).carry
+ SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
+ DAG.getConstant(NegConstant, DL, MVT::i64));
+ SDValue AddOrZ = NegConstant != 0 ? Add : Z;
+ SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
+ AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
+ return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
+ SDValue(Addc.getNode(), 1));
+ }
+ case ISD::SETEQ: {
+ // when C == 0
+ // --> addze X, (subfic Z, 0).carry
+ // /
+ // add X, (zext(sete Z, C))--
+ // \ when -32768 <= -C <= 32767 && C != 0
+ // --> addze X, (subfic (addi Z, -C), 0).carry
+ SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
+ DAG.getConstant(NegConstant, DL, MVT::i64));
+ SDValue AddOrZ = NegConstant != 0 ? Add : Z;
+ SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
+ DAG.getConstant(0, DL, MVT::i64), AddOrZ);
+ return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
+ SDValue(Subc.getNode(), 1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
+ if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
+ return Value;
+
+ return SDValue();
+}
+
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())
--- /dev/null
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr9 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr9 < %s | FileCheck %s
+
+define i64 @addze1(i64 %X, i64 %Z) {
+; CHECK-LABEL: addze1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addic [[REG1:r[0-9]+]], [[REG1]], -1
+; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
+; CHECK-NEXT: blr
+ %cmp = icmp ne i64 %Z, 0
+ %conv1 = zext i1 %cmp to i64
+ %add = add nsw i64 %conv1, %X
+ ret i64 %add
+}
+
+define i64 @addze2(i64 %X, i64 %Z) {
+; CHECK-LABEL: addze2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subfic [[REG1:r[0-9]+]], [[REG1]], 0
+; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
+; CHECK-NEXT: blr
+ %cmp = icmp eq i64 %Z, 0
+ %conv1 = zext i1 %cmp to i64
+ %add = add nsw i64 %conv1, %X
+ ret i64 %add
+}
+
+define i64 @addze3(i64 %X, i64 %Z) {
+; CHECK-LABEL: addze3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG1]], -32768
+; CHECK-NEXT: addic [[REG1]], [[REG1]], -1
+; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
+; CHECK-NEXT: blr
+ %cmp = icmp ne i64 %Z, 32768
+ %conv1 = zext i1 %cmp to i64
+ %add = add nsw i64 %conv1, %X
+ ret i64 %add
+}
+
+define i64 @addze4(i64 %X, i64 %Z) {
+; CHECK-LABEL: addze4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG1]], -32768
+; CHECK-NEXT: subfic [[REG1]], [[REG1]], 0
+; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
+; CHECK-NEXT: blr
+ %cmp = icmp eq i64 %Z, 32768
+ %conv1 = zext i1 %cmp to i64
+ %add = add nsw i64 %conv1, %X
+ ret i64 %add
+}
+
+define i64 @addze5(i64 %X, i64 %Z) {
+; CHECK-LABEL: addze5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG1]], 32767
+; CHECK-NEXT: addic [[REG1]], [[REG1]], -1
+; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
+; CHECK-NEXT: blr
+ %cmp = icmp ne i64 %Z, -32767
+ %conv1 = zext i1 %cmp to i64
+ %add = add nsw i64 %conv1, %X
+ ret i64 %add
+}
+
+define i64 @addze6(i64 %X, i64 %Z) {
+; CHECK-LABEL: addze6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG1]], 32767
+; CHECK-NEXT: subfic [[REG1]], [[REG1]], 0
+; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
+; CHECK-NEXT: blr
+ %cmp = icmp eq i64 %Z, -32767
+ %conv1 = zext i1 %cmp to i64
+ %add = add nsw i64 %conv1, %X
+ ret i64 %add
+}
+
+; element is out of range
+define i64 @test1(i64 %X, i64 %Z) {
+; CHECK-LABEL: test1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li [[REG1:r[0-9]+]], -32768
+; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1]]
+; CHECK-NEXT: addic [[REG1]], [[REG2]], -1
+; CHECK-NEXT: subfe [[REG2]], [[REG1]], [[REG2]]
+; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
+; CHECK-NEXT: blr
+ %cmp = icmp ne i64 %Z, -32768
+ %conv1 = zext i1 %cmp to i64
+ %add = add nsw i64 %conv1, %X
+ ret i64 %add
+}
+
+define i64 @test2(i64 %X, i64 %Z) {
+; CHECK-LABEL: test2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li [[REG1:r[0-9]+]], -32768
+; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1]]
+; CHECK-NEXT: cntlzd [[REG2]], [[REG2]]
+; CHECK-NEXT: rldicl [[REG2]], [[REG2]], 58, 63
+; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
+; CHECK-NEXT: blr
+ %cmp = icmp eq i64 %Z, -32768
+ %conv1 = zext i1 %cmp to i64
+ %add = add nsw i64 %conv1, %X
+ ret i64 %add
+}
+
+define i64 @test3(i64 %X, i64 %Z) {
+; CHECK-LABEL: test3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li [[REG1:r[0-9]+]], 0
+; CHECK-NEXT: ori [[REG1]], [[REG1]], 32769
+; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1]]
+; CHECK-NEXT: addic [[REG1]], [[REG2]], -1
+; CHECK-NEXT: subfe [[REG2]], [[REG1]], [[REG2]]
+; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
+; CHECK-NEXT: blr
+ %cmp = icmp ne i64 %Z, 32769
+ %conv1 = zext i1 %cmp to i64
+ %add = add nsw i64 %conv1, %X
+ ret i64 %add
+}
+
+define i64 @test4(i64 %X, i64 %Z) {
+; CHECK-LABEL: test4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li [[REG1:r[0-9]+]], 0
+; CHECK-NEXT: ori [[REG1]], [[REG1]], 32769
+; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1]]
+; CHECK-NEXT: cntlzd [[REG2]], [[REG2]]
+; CHECK-NEXT: rldicl [[REG2]], [[REG2]], 58, 63
+; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
+; CHECK-NEXT: blr
+ %cmp = icmp eq i64 %Z, 32769
+ %conv1 = zext i1 %cmp to i64
+ %add = add nsw i64 %conv1, %X
+ ret i64 %add
+}
+
+; comparison of two registers
+define i64 @test5(i64 %X, i64 %Y, i64 %Z) {
+; CHECK-LABEL: test5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1:r[0-9]+]]
+; CHECK-NEXT: addic [[REG1]], [[REG2]], -1
+; CHECK-NEXT: subfe [[REG2]], [[REG1]], [[REG2]]
+; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
+; CHECK-NEXT: blr
+ %cmp = icmp ne i64 %Y, %Z
+ %conv1 = zext i1 %cmp to i64
+ %add = add nsw i64 %conv1, %X
+ ret i64 %add
+}
+
+define i64 @test6(i64 %X, i64 %Y, i64 %Z) {
+; CHECK-LABEL: test6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1:r[0-9]+]]
+; CHECK-NEXT: cntlzd [[REG2]], [[REG2]]
+; CHECK-NEXT: rldicl [[REG2]], [[REG2]], 58, 63
+; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
+; CHECK-NEXT: blr
+ %cmp = icmp eq i64 %Y, %Z
+ %conv1 = zext i1 %cmp to i64
+ %add = add nsw i64 %conv1, %X
+ ret i64 %add
+}