INC/DEC are pretty much the same as ADD/SUB except that they don't update the C flag.
This patch removes the special nodes and just pattern matches from ADD/SUB during isel if the C flag isn't being used.
I had to avoid selecting DEC is the result isn't used. This will become a SUB immediate which will turned into a CMP later by optimizeCompareInstr. This lead to the one test change where we use a CMP instead of a DEC for an overflow intrinsic since we only checked the flag.
This also exposed a hole in our RMW flag matching use of hasNoCarryFlagUses. Our root node for the match is a store and there's no guarantee that all the flag users have been selected yet. So hasNoCarryFlagUses needs to check copyToReg and machine opcodes, but it also needs to check for the pre-match SETCC, SETCC_CARRY, BRCOND, and CMOV opcodes.
Differential Revision: https://reviews.llvm.org/D55975
llvm-svn: 350245
isCommutativeIntrinsic(II))
std::swap(LHS, RHS);
- bool UseIncDec = false;
- if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
- UseIncDec = true;
-
unsigned BaseOpc, CondOpc;
switch (II->getIntrinsicID()) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::sadd_with_overflow:
- BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
- CondOpc = X86::SETOr;
- break;
+ BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
case Intrinsic::uadd_with_overflow:
BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
case Intrinsic::ssub_with_overflow:
- BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
- CondOpc = X86::SETOr;
- break;
+ BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
case Intrinsic::usub_with_overflow:
BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
case Intrinsic::smul_with_overflow:
{ X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
};
- if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
+ if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
+ CondOpc == X86::SETOr) {
+ // We can use INC/DEC.
ResultReg = createResultReg(TLI.getRegClassFor(VT));
- bool IsDec = BaseOpc == X86ISD::DEC;
+ bool IsDec = BaseOpc == ISD::SUB;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
.addReg(LHSReg, getKillRegState(LHSIsKill));
return true;
}
+static bool mayUseCarryFlag(X86::CondCode CC) {
+ switch (CC) {
+ // Comparisons which don't examine the CF flag.
+ case X86::COND_O: case X86::COND_NO:
+ case X86::COND_E: case X86::COND_NE:
+ case X86::COND_S: case X86::COND_NS:
+ case X86::COND_P: case X86::COND_NP:
+ case X86::COND_L: case X86::COND_GE:
+ case X86::COND_G: case X86::COND_LE:
+ return false;
+ // Anything else: assume conservatively.
+ default:
+ return true;
+ }
+}
+
/// Test whether the given node which sets flags has any uses which require the
/// CF flag to be accurate.
bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
// Only check things that use the flags.
if (UI.getUse().getResNo() != Flags.getResNo())
continue;
- // Only examine CopyToReg uses that copy to EFLAGS.
- if (UI->getOpcode() != ISD::CopyToReg ||
- cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
- return false;
- // Examine each user of the CopyToReg use.
- for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
- FlagUI != FlagUE; ++FlagUI) {
- // Only examine the Flag result.
- if (FlagUI.getUse().getResNo() != 1)
- continue;
- // Anything unusual: assume conservatively.
- if (!FlagUI->isMachineOpcode())
- return false;
- // Examine the condition code of the user.
- X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
- switch (CC) {
- // Comparisons which don't examine the CF flag.
- case X86::COND_O: case X86::COND_NO:
- case X86::COND_E: case X86::COND_NE:
- case X86::COND_S: case X86::COND_NS:
- case X86::COND_P: case X86::COND_NP:
- case X86::COND_L: case X86::COND_GE:
- case X86::COND_G: case X86::COND_LE:
- continue;
- // Anything else: assume conservatively.
- default:
+ unsigned UIOpc = UI->getOpcode();
+
+ if (UIOpc == ISD::CopyToReg) {
+ // Only examine CopyToReg uses that copy to EFLAGS.
+ if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
return false;
+ // Examine each user of the CopyToReg use.
+ for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
+ FlagUI != FlagUE; ++FlagUI) {
+ // Only examine the Flag result.
+ if (FlagUI.getUse().getResNo() != 1)
+ continue;
+ // Anything unusual: assume conservatively.
+ if (!FlagUI->isMachineOpcode())
+ return false;
+ // Examine the condition code of the user.
+ X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
+
+ if (mayUseCarryFlag(CC))
+ return false;
}
+
+ // This CopyToReg is ok. Move on to the next user.
+ continue;
+ }
+
+ // This might be an unselected node. So look for the pre-isel opcodes that
+ // use flags.
+ unsigned CCOpNo;
+ switch (UIOpc) {
+ default:
+ // Something unusual. Be conservative.
+ return false;
+ case X86ISD::SETCC: CCOpNo = 0; break;
+ case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
+ case X86ISD::CMOV: CCOpNo = 2; break;
+ case X86ISD::BRCOND: CCOpNo = 2; break;
}
+
+ X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
+ if (mayUseCarryFlag(CC))
+ return false;
}
return true;
}
switch (Opc) {
default:
return false;
- case X86ISD::INC:
- case X86ISD::DEC:
case X86ISD::SUB:
case X86ISD::SBB:
break;
MachineSDNode *Result;
switch (Opc) {
- case X86ISD::INC:
- case X86ISD::DEC: {
- unsigned NewOpc =
- Opc == X86ISD::INC
- ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
- : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
- const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
- Result =
- CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
- break;
- }
case X86ISD::ADD:
- case X86ISD::ADC:
case X86ISD::SUB:
+ // Try to match inc/dec.
+ if (!Subtarget->slowIncDec() ||
+ CurDAG->getMachineFunction().getFunction().optForSize()) {
+ bool IsOne = isOneConstant(StoredVal.getOperand(1));
+ bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
+ // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
+ if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
+ unsigned NewOpc =
+ ((Opc == X86ISD::ADD) == IsOne)
+ ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
+ : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
+ const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
+ Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
+ MVT::Other, Ops);
+ break;
+ }
+ }
+ LLVM_FALLTHROUGH;
+ case X86ISD::ADC:
case X86ISD::SBB:
case X86ISD::AND:
case X86ISD::OR:
// which may be the result of a CAST. We use the variable 'Op', which is the
// non-casted variable when we check for possible users.
switch (ArithOp.getOpcode()) {
- case ISD::ADD:
- // We only want to rewrite this as a target-specific node with attached
- // flags if there is a reasonable chance of either using that to do custom
- // instructions selection that can fold some of the memory operands, or if
- // only the flags are used. If there are other uses, leave the node alone
- // and emit a test instruction.
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = Op.getNode()->use_end(); UI != UE; ++UI)
- if (UI->getOpcode() != ISD::CopyToReg &&
- UI->getOpcode() != ISD::SETCC &&
- UI->getOpcode() != ISD::STORE)
- goto default_case;
-
- if (auto *C = dyn_cast<ConstantSDNode>(ArithOp.getOperand(1))) {
- // An add of one will be selected as an INC.
- if (C->isOne() &&
- (!Subtarget.slowIncDec() ||
- DAG.getMachineFunction().getFunction().optForSize())) {
- Opcode = X86ISD::INC;
- NumOperands = 1;
- break;
- }
-
- // An add of negative one (subtract of one) will be selected as a DEC.
- if (C->isAllOnesValue() &&
- (!Subtarget.slowIncDec() ||
- DAG.getMachineFunction().getFunction().optForSize())) {
- Opcode = X86ISD::DEC;
- NumOperands = 1;
- break;
- }
- }
-
- // Otherwise use a regular EFLAGS-setting add.
- Opcode = X86ISD::ADD;
- NumOperands = 2;
- break;
-
case ISD::AND:
// If the primary 'and' result isn't used, don't bother using X86ISD::AND,
// because a TEST instruction will be better.
break;
LLVM_FALLTHROUGH;
+ case ISD::ADD:
case ISD::SUB:
case ISD::OR:
case ISD::XOR:
- // Similar to ISD::ADD above, check if the uses will preclude useful
- // lowering of the target-specific node.
+ // Transform to an x86-specific ALU node with flags if there is a chance of
+ // using an RMW op or only the flags are used. Otherwise, leave
+ // the node alone and emit a 'test' instruction.
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
if (UI->getOpcode() != ISD::CopyToReg &&
// Otherwise use a regular EFLAGS-setting instruction.
switch (ArithOp.getOpcode()) {
default: llvm_unreachable("unexpected operator!");
+ case ISD::ADD: Opcode = X86ISD::ADD; break;
case ISD::SUB: Opcode = X86ISD::SUB; break;
case ISD::XOR: Opcode = X86ISD::XOR; break;
case ISD::AND: Opcode = X86ISD::AND; break;
break;
case X86ISD::ADD:
case X86ISD::SUB:
- case X86ISD::INC:
- case X86ISD::DEC:
case X86ISD::OR:
case X86ISD::XOR:
case X86ISD::AND:
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown ovf instruction!");
case ISD::SADDO:
- // A subtract of one will be selected as a INC. Note that INC doesn't
- // set CF, so we can't do this for UADDO.
- if (isOneConstant(RHS)) {
- BaseOp = X86ISD::INC;
- Cond = X86::COND_O;
- break;
- }
BaseOp = X86ISD::ADD;
Cond = X86::COND_O;
break;
Cond = X86::COND_B;
break;
case ISD::SSUBO:
- // A subtract of one will be selected as a DEC. Note that DEC doesn't
- // set CF, so we can't do this for USUBO.
- if (isOneConstant(RHS)) {
- BaseOp = X86ISD::DEC;
- Cond = X86::COND_O;
- break;
- }
BaseOp = X86ISD::SUB;
Cond = X86::COND_O;
break;
if (Op.getResNo() == 1 &&
(Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC ||
Opc == X86ISD::SBB || Opc == X86ISD::SMUL || Opc == X86ISD::UMUL ||
- Opc == X86ISD::INC || Opc == X86ISD::DEC || Opc == X86ISD::OR ||
- Opc == X86ISD::XOR || Opc == X86ISD::AND))
+ Opc == X86ISD::OR || Opc == X86ISD::XOR || Opc == X86ISD::AND))
return true;
return false;
}
static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- bool AllowIncDec = true) {
+ const X86Subtarget &Subtarget) {
unsigned NewOpc = 0;
switch (N->getOpcode()) {
case ISD::ATOMIC_LOAD_ADD:
MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
- if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
- // Convert to inc/dec if they aren't slow or we are optimizing for size.
- if (AllowIncDec && (!Subtarget.slowIncDec() ||
- DAG.getMachineFunction().getFunction().optForSize())) {
- if ((NewOpc == X86ISD::LADD && C->isOne()) ||
- (NewOpc == X86ISD::LSUB && C->isAllOnesValue()))
- return DAG.getMemIntrinsicNode(X86ISD::LINC, SDLoc(N),
- DAG.getVTList(MVT::i32, MVT::Other),
- {N->getOperand(0), N->getOperand(1)},
- /*MemVT=*/N->getSimpleValueType(0), MMO);
- if ((NewOpc == X86ISD::LSUB && C->isOne()) ||
- (NewOpc == X86ISD::LADD && C->isAllOnesValue()))
- return DAG.getMemIntrinsicNode(X86ISD::LDEC, SDLoc(N),
- DAG.getVTList(MVT::i32, MVT::Other),
- {N->getOperand(0), N->getOperand(1)},
- /*MemVT=*/N->getSimpleValueType(0), MMO);
- }
- }
-
return DAG.getMemIntrinsicNode(
NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other),
{N->getOperand(0), N->getOperand(1), N->getOperand(2)},
case X86ISD::LOR: return "X86ISD::LOR";
case X86ISD::LXOR: return "X86ISD::LXOR";
case X86ISD::LAND: return "X86ISD::LAND";
- case X86ISD::LINC: return "X86ISD::LINC";
- case X86ISD::LDEC: return "X86ISD::LDEC";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
case X86ISD::SBB: return "X86ISD::SBB";
case X86ISD::SMUL: return "X86ISD::SMUL";
case X86ISD::UMUL: return "X86ISD::UMUL";
- case X86ISD::INC: return "X86ISD::INC";
- case X86ISD::DEC: return "X86ISD::DEC";
case X86ISD::OR: return "X86ISD::OR";
case X86ISD::XOR: return "X86ISD::XOR";
case X86ISD::AND: return "X86ISD::AND";
/*Chain*/ CmpLHS.getOperand(0), /*LHS*/ CmpLHS.getOperand(1),
/*RHS*/ DAG.getConstant(-Addend, SDLoc(CmpRHS), CmpRHS.getValueType()),
AN->getMemOperand());
- // If the comparision uses the CF flag we can't use INC/DEC instructions.
- bool NeedCF = false;
- switch (CC) {
- default: break;
- case X86::COND_A: case X86::COND_AE:
- case X86::COND_B: case X86::COND_BE:
- NeedCF = true;
- break;
- }
- auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget, !NeedCF);
+ auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget);
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
DAG.getUNDEF(CmpLHS.getValueType()));
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
// Arithmetic operations with FLAGS results.
ADD, SUB, ADC, SBB, SMUL, UMUL,
- INC, DEC, OR, XOR, AND,
+ OR, XOR, AND,
// Bit field extract.
BEXTR,
/// LOCK-prefixed arithmetic read-modify-write instructions.
/// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
- LADD, LSUB, LOR, LXOR, LAND, LINC, LDEC,
+ LADD, LSUB, LOR, LXOR, LAND,
// Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,
} // SchedRW
} // CodeSize
+def X86add_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86add_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+def X86sub_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86sub_flag node:$lhs, node:$rhs), [{
+ // Only use DEC if the result is used.
+ return !SDValue(N, 0).use_empty() && hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
// TODO: inc/dec is slow for P4, but fast for Pentium-M.
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
let CodeSize = 2 in
def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"inc{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
+ [(set GR8:$dst, EFLAGS, (X86add_flag_nocf GR8:$src1, 1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
def INC16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, OpSize16;
+ [(set GR16:$dst, EFLAGS, (X86add_flag_nocf GR16:$src1, 1))]>,
+ OpSize16;
def INC32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, OpSize32;
+ [(set GR32:$dst, EFLAGS, (X86add_flag_nocf GR32:$src1, 1))]>,
+ OpSize32;
def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>;
+ [(set GR64:$dst, EFLAGS, (X86add_flag_nocf GR64:$src1, 1))]>;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
let CodeSize = 2 in
def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"dec{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
+ [(set GR8:$dst, EFLAGS, (X86sub_flag_nocf GR8:$src1, 1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
def DEC16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, OpSize16;
+ [(set GR16:$dst, EFLAGS, (X86sub_flag_nocf GR16:$src1, 1))]>,
+ OpSize16;
def DEC32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, OpSize32;
+ [(set GR32:$dst, EFLAGS, (X86sub_flag_nocf GR32:$src1, 1))]>,
+ OpSize32;
def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>;
+ [(set GR64:$dst, EFLAGS, (X86sub_flag_nocf GR64:$src1, 1))]>;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, X86lock_and, "and">;
defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">;
-multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
- string frag, string mnemonic> {
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
- SchedRW = [WriteALURMW] in {
-def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
- !strconcat(mnemonic, "{b}\t$dst"),
- [(set EFLAGS, (!cast<PatFrag>(frag # "_8") addr:$dst))]>,
- LOCK;
-def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
- !strconcat(mnemonic, "{w}\t$dst"),
- [(set EFLAGS, (!cast<PatFrag>(frag # "_16") addr:$dst))]>,
- OpSize16, LOCK;
-def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
- !strconcat(mnemonic, "{l}\t$dst"),
- [(set EFLAGS, (!cast<PatFrag>(frag # "_32") addr:$dst))]>,
- OpSize32, LOCK;
-def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
- !strconcat(mnemonic, "{q}\t$dst"),
- [(set EFLAGS, (!cast<PatFrag>(frag # "_64") addr:$dst))]>,
- LOCK;
-}
-}
+def X86lock_add_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86lock_add node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 0));
+}]>;
-multiclass unary_atomic_intrin<SDNode atomic_op> {
- def _8 : PatFrag<(ops node:$ptr),
- (atomic_op node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
- }]>;
- def _16 : PatFrag<(ops node:$ptr),
- (atomic_op node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
- }]>;
- def _32 : PatFrag<(ops node:$ptr),
- (atomic_op node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
- }]>;
- def _64 : PatFrag<(ops node:$ptr),
- (atomic_op node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
- }]>;
-}
+def X86lock_sub_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86lock_sub node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 0));
+}]>;
-defm X86lock_inc : unary_atomic_intrin<X86lock_inc>;
-defm X86lock_dec : unary_atomic_intrin<X86lock_dec>;
+let Predicates = [UseIncDec] in {
+ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+ SchedRW = [WriteALURMW] in {
+ def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
+ "inc{b}\t$dst",
+ [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i8 1)))]>,
+ LOCK;
+ def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
+ "inc{w}\t$dst",
+ [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i16 1)))]>,
+ OpSize16, LOCK;
+ def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
+ "inc{l}\t$dst",
+ [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i32 1)))]>,
+ OpSize32, LOCK;
+ def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
+ "inc{q}\t$dst",
+ [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i64 1)))]>,
+ LOCK;
+
+ def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
+ "dec{b}\t$dst",
+ [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i8 1)))]>,
+ LOCK;
+ def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
+ "dec{w}\t$dst",
+ [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i16 1)))]>,
+ OpSize16, LOCK;
+ def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
+ "dec{l}\t$dst",
+ [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i32 1)))]>,
+ OpSize32, LOCK;
+ def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
+ "dec{q}\t$dst",
+ [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i64 1)))]>,
+ LOCK;
+ }
-defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "X86lock_inc", "inc">;
-defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "X86lock_dec", "dec">;
+ // Additional patterns for -1 constant.
+ def : Pat<(X86lock_add addr:$dst, (i8 -1)), (LOCK_DEC8m addr:$dst)>;
+ def : Pat<(X86lock_add addr:$dst, (i16 -1)), (LOCK_DEC16m addr:$dst)>;
+ def : Pat<(X86lock_add addr:$dst, (i32 -1)), (LOCK_DEC32m addr:$dst)>;
+ def : Pat<(X86lock_add addr:$dst, (i64 -1)), (LOCK_DEC64m addr:$dst)>;
+ def : Pat<(X86lock_sub addr:$dst, (i8 -1)), (LOCK_INC8m addr:$dst)>;
+ def : Pat<(X86lock_sub addr:$dst, (i16 -1)), (LOCK_INC16m addr:$dst)>;
+ def : Pat<(X86lock_sub addr:$dst, (i32 -1)), (LOCK_INC32m addr:$dst)>;
+ def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>;
+}
// Atomic compare and swap.
multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>;
def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>;
def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
+
+ def : Pat<(X86add_flag_nocf GR8:$src, -1), (DEC8r GR8:$src)>;
+ def : Pat<(X86add_flag_nocf GR16:$src, -1), (DEC16r GR16:$src)>;
+ def : Pat<(X86add_flag_nocf GR32:$src, -1), (DEC32r GR32:$src)>;
+ def : Pat<(X86add_flag_nocf GR64:$src, -1), (DEC64r GR64:$src)>;
+ def : Pat<(X86sub_flag_nocf GR8:$src, -1), (INC8r GR8:$src)>;
+ def : Pat<(X86sub_flag_nocf GR16:$src, -1), (INC16r GR16:$src)>;
+ def : Pat<(X86sub_flag_nocf GR32:$src, -1), (INC32r GR32:$src)>;
+ def : Pat<(X86sub_flag_nocf GR64:$src, -1), (INC64r GR64:$src)>;
}
// or reg/reg.
def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>;
def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>;
-def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
-def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad,
SDNPMemOperand]>;
-def X86lock_inc : SDNode<"X86ISD::LINC", SDTLockUnaryArithWithFlags,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
- SDNPMemOperand]>;
-def X86lock_dec : SDNode<"X86ISD::LDEC", SDTLockUnaryArithWithFlags,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
- SDNPMemOperand]>;
-
def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>;
def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntBinOp>;
define i1 @func3(i32 %x) nounwind {
; CHECK-LABEL: func3:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: decl %eax
+; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
; CHECK-NEXT: seto %al
; CHECK-NEXT: retl
entry: