From 55c181a6c786cfbfa8b7aabe0a8ba721a65b1445 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 21 Feb 2022 10:52:09 +0000 Subject: [PATCH] Revert "[AArch64][GlobalISel] Optimize conjunctions of compares to conditional compares." This reverts commit 2a46450849de6904fc64f9a65303b20ca7fc9dbd. This triggers the following assertion in an internal project: Assertion failed: (VRegInfo[Reg.id()].first.is() && "Register class not set, wrong accessor"), function getRegClass, file MachineRegisterInfo.h, line 646. I'll work with the author directly to get a reproducer. --- .../llvm/CodeGen/GlobalISel/GenericMachineInstrs.h | 32 -- .../AArch64/GISel/AArch64InstructionSelector.cpp | 381 +-------------------- llvm/test/CodeGen/AArch64/arm64-ccmp.ll | 260 ++++++++++---- 3 files changed, 194 insertions(+), 479 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index 58fe482..7103656 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -14,7 +14,6 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H #define LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H -#include "llvm/IR/Instructions.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/TargetOpcodes.h" @@ -227,37 +226,6 @@ public: } }; -/// Represent a G_ICMP or G_FCMP. -class GAnyCmp : public GenericMachineInstr { -public: - CmpInst::Predicate getCond() const { - return static_cast(getOperand(1).getPredicate()); - } - Register getLHSReg() const { return getReg(2); } - Register getRHSReg() const { return getReg(3); } - - static bool classof(const MachineInstr *MI) { - return MI->getOpcode() == TargetOpcode::G_ICMP || - MI->getOpcode() == TargetOpcode::G_FCMP; - } -}; - -/// Represent a G_ICMP. -class GICmp : public GAnyCmp { -public: - static bool classof(const MachineInstr *MI) { - return MI->getOpcode() == TargetOpcode::G_ICMP; - } -}; - -/// Represent a G_FCMP. -class GFCmp : public GAnyCmp { -public: - static bool classof(const MachineInstr *MI) { - return MI->getOpcode() == TargetOpcode::G_FCMP; - } -}; - } // namespace llvm #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 0b06539..8a79d24 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -27,7 +27,6 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" @@ -64,7 +63,6 @@ namespace { #include "AArch64GenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATE_BITSET - class AArch64InstructionSelector : public InstructionSelector { public: AArch64InstructionSelector(const AArch64TargetMachine &TM, @@ -296,20 +294,6 @@ private: emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; - /// Emit expression as a conjunction (a series of CCMP/CFCMP ops). - /// In some cases this is even possible with OR operations in the expression. - MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC, - MachineIRBuilder &MIB) const; - MachineInstr *emitConditionalComparison(Register LHS, Register RHS, - CmpInst::Predicate CC, - AArch64CC::CondCode Predicate, - AArch64CC::CondCode OutCC, - MachineIRBuilder &MIB) const; - MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC, - bool Negate, Register CCOp, - AArch64CC::CondCode Predicate, - MachineIRBuilder &MIB) const; - /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. /// \p IsNegative is true if the test should be "not zero". /// This will also optimize the test bit instruction when possible. @@ -441,8 +425,7 @@ private: void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags); // Optimization methods. - bool tryOptSelect(GSelect &Sel); - bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI); + bool tryOptSelect(MachineInstr &MI); MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, MachineIRBuilder &MIRBuilder) const; @@ -1327,90 +1310,6 @@ static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { } } -/// changeFPCCToAArch64CC - Convert an IR fp condition code to an AArch64 CC. -static void changeFPCCToAArch64CC(CmpInst::Predicate CC, - AArch64CC::CondCode &CondCode, - AArch64CC::CondCode &CondCode2) { - CondCode2 = AArch64CC::AL; - switch (CC) { - default: - llvm_unreachable("Unknown FP condition!"); - case CmpInst::FCMP_OEQ: - CondCode = AArch64CC::EQ; - break; - case CmpInst::FCMP_OGT: - CondCode = AArch64CC::GT; - break; - case CmpInst::FCMP_OGE: - CondCode = AArch64CC::GE; - break; - case CmpInst::FCMP_OLT: - CondCode = AArch64CC::MI; - break; - case CmpInst::FCMP_OLE: - CondCode = AArch64CC::LS; - break; - case CmpInst::FCMP_ONE: - CondCode = AArch64CC::MI; - CondCode2 = AArch64CC::GT; - break; - case CmpInst::FCMP_ORD: - CondCode = AArch64CC::VC; - break; - case CmpInst::FCMP_UNO: - CondCode = AArch64CC::VS; - break; - case CmpInst::FCMP_UEQ: - CondCode = AArch64CC::EQ; - CondCode2 = AArch64CC::VS; - break; - case CmpInst::FCMP_UGT: - CondCode = AArch64CC::HI; - break; - case CmpInst::FCMP_UGE: - CondCode = AArch64CC::PL; - break; - case CmpInst::FCMP_ULT: - CondCode = AArch64CC::LT; - break; - case CmpInst::FCMP_ULE: - CondCode = AArch64CC::LE; - break; - case CmpInst::FCMP_UNE: - CondCode = AArch64CC::NE; - break; - } -} - -/// Convert an IR fp condition code to an AArch64 CC. -/// This differs from changeFPCCToAArch64CC in that it returns cond codes that -/// should be AND'ed instead of OR'ed. -static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, - AArch64CC::CondCode &CondCode, - AArch64CC::CondCode &CondCode2) { - CondCode2 = AArch64CC::AL; - switch (CC) { - default: - changeFPCCToAArch64CC(CC, CondCode, CondCode2); - assert(CondCode2 == AArch64CC::AL); - break; - case CmpInst::FCMP_ONE: - // (a one b) - // == ((a olt b) || (a ogt b)) - // == ((a ord b) && (a une b)) - CondCode = AArch64CC::VC; - CondCode2 = AArch64CC::NE; - break; - case CmpInst::FCMP_UEQ: - // (a ueq b) - // == ((a uno b) || (a oeq b)) - // == ((a ule b) && (a uge b)) - CondCode = AArch64CC::PL; - CondCode2 = AArch64CC::LE; - break; - } -} - /// Return a register which can be used as a bit to test in a TB(N)Z. static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI) { @@ -3393,18 +3292,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return selectCopy(I, TII, MRI, TRI, RBI); case TargetOpcode::G_SELECT: { - auto &Sel = cast(I); - if (MRI.getType(Sel.getCondReg()) != LLT::scalar(1)) { + if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) { LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty << ", expected: " << LLT::scalar(1) << '\n'); return false; } - const Register CondReg = Sel.getCondReg(); - const Register TReg = Sel.getTrueReg(); - const Register FReg = Sel.getFalseReg(); + const Register CondReg = I.getOperand(1).getReg(); + const Register TReg = I.getOperand(2).getReg(); + const Register FReg = I.getOperand(3).getReg(); - if (tryOptSelect(Sel)) + if (tryOptSelect(I)) return true; // Make sure to use an unused vreg instead of wzr, so that the peephole @@ -3413,9 +3311,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg}) .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); - if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB)) + if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB)) return false; - Sel.eraseFromParent(); + I.eraseFromParent(); return true; } case TargetOpcode::G_ICMP: { @@ -4804,263 +4702,7 @@ AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, } } -/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be -/// expressed as a conjunction. -/// \param CanNegate Set to true if we can negate the whole sub-tree just by -/// changing the conditions on the CMP tests. -/// (this means we can call emitConjunctionRec() with -/// Negate==true on this sub-tree) -/// \param MustBeFirst Set to true if this subtree needs to be negated and we -/// cannot do the negation naturally. We are required to -/// emit the subtree first in this case. -/// \param WillNegate Is true if are called when the result of this -/// subexpression must be negated. This happens when the -/// outer expression is an OR. We can use this fact to know -/// that we have a double negation (or (or ...) ...) that -/// can be implemented for free. -static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, - bool WillNegate, MachineRegisterInfo &MRI, - unsigned Depth = 0) { - if (!MRI.hasOneNonDBGUse(Val)) - return false; - MachineInstr *ValDef = MRI.getVRegDef(Val); - unsigned Opcode = ValDef->getOpcode(); - if (Opcode == TargetOpcode::G_TRUNC) { - // Look through a trunc. - Val = ValDef->getOperand(1).getReg(); - ValDef = MRI.getVRegDef(Val); - Opcode = ValDef->getOpcode(); - } - if (isa(ValDef)) { - CanNegate = true; - MustBeFirst = false; - return true; - } - // Protect against exponential runtime and stack overflow. - if (Depth > 6) - return false; - if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) { - bool IsOR = Opcode == TargetOpcode::G_OR; - Register O0 = ValDef->getOperand(1).getReg(); - Register O1 = ValDef->getOperand(2).getReg(); - bool CanNegateL; - bool MustBeFirstL; - if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1)) - return false; - bool CanNegateR; - bool MustBeFirstR; - if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1)) - return false; - - if (MustBeFirstL && MustBeFirstR) - return false; - - if (IsOR) { - // For an OR expression we need to be able to naturally negate at least - // one side or we cannot do the transformation at all. - if (!CanNegateL && !CanNegateR) - return false; - // If we the result of the OR will be negated and we can naturally negate - // the leafs, then this sub-tree as a whole negates naturally. - CanNegate = WillNegate && CanNegateL && CanNegateR; - // If we cannot naturally negate the whole sub-tree, then this must be - // emitted first. - MustBeFirst = !CanNegate; - } else { - assert(Opcode == TargetOpcode::G_AND && "Must be G_AND"); - // We cannot naturally negate an AND operation. - CanNegate = false; - MustBeFirst = MustBeFirstL || MustBeFirstR; - } - return true; - } - return false; -} - -MachineInstr *AArch64InstructionSelector::emitConditionalComparison( - Register LHS, Register RHS, CmpInst::Predicate CC, - AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, - MachineIRBuilder &MIB) const { - // TODO: emit CMN as an optimization. - auto &MRI = *MIB.getMRI(); - LLT OpTy = MRI.getType(LHS); - assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64); - unsigned CCmpOpc; - if (CmpInst::isIntPredicate(CC)) { - CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr; - } else { - switch (OpTy.getSizeInBits()) { - case 16: - CCmpOpc = AArch64::FCCMPHrr; - break; - case 32: - CCmpOpc = AArch64::FCCMPSrr; - break; - case 64: - CCmpOpc = AArch64::FCCMPDrr; - break; - default: - return nullptr; - } - } - AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); - unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); - auto CCmp = - MIB.buildInstr(CCmpOpc, {}, {LHS, RHS}).addImm(NZCV).addImm(Predicate); - constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI); - return &*CCmp; -} - -MachineInstr *AArch64InstructionSelector::emitConjunctionRec( - Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp, - AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const { - // We're at a tree leaf, produce a conditional comparison operation. - auto &MRI = *MIB.getMRI(); - MachineInstr *ValDef = MRI.getVRegDef(Val); - unsigned Opcode = ValDef->getOpcode(); - if (Opcode == TargetOpcode::G_TRUNC) { - // Look through a trunc. - Val = ValDef->getOperand(1).getReg(); - ValDef = MRI.getVRegDef(Val); - Opcode = ValDef->getOpcode(); - } - if (auto *Cmp = dyn_cast(ValDef)) { - Register LHS = Cmp->getLHSReg(); - Register RHS = Cmp->getRHSReg(); - CmpInst::Predicate CC = Cmp->getCond(); - if (Negate) - CC = CmpInst::getInversePredicate(CC); - // We only handle integer compares for now. - if (isa(Cmp)) { - OutCC = changeICMPPredToAArch64CC(CC); - } else { - // Handle special FP cases. - AArch64CC::CondCode ExtraCC; - changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC); - // Some floating point conditions can't be tested with a single condition - // code. Construct an additional comparison in this case. - if (ExtraCC != AArch64CC::AL) { - MachineInstr *ExtraCmp; - if (!CCOp) - ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC); - else - ExtraCmp = - emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB); - CCOp = ExtraCmp->getOperand(0).getReg(); - Predicate = ExtraCC; - } - } - - // Produce a normal comparison if we are first in the chain - if (!CCOp) { - auto Dst = MRI.cloneVirtualRegister(LHS); - if (isa(Cmp)) - return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB); - return emitFPCompare(Cmp->getOperand(2).getReg(), - Cmp->getOperand(3).getReg(), MIB); - } - // Otherwise produce a ccmp. - return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB); - } - assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree"); - - bool IsOR = Opcode == TargetOpcode::G_OR; - - Register LHS = ValDef->getOperand(1).getReg(); - bool CanNegateL; - bool MustBeFirstL; - bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI); - assert(ValidL && "Valid conjunction/disjunction tree"); - (void)ValidL; - - Register RHS = ValDef->getOperand(2).getReg(); - bool CanNegateR; - bool MustBeFirstR; - bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI); - assert(ValidR && "Valid conjunction/disjunction tree"); - (void)ValidR; - - // Swap sub-tree that must come first to the right side. - if (MustBeFirstL) { - assert(!MustBeFirstR && "Valid conjunction/disjunction tree"); - std::swap(LHS, RHS); - std::swap(CanNegateL, CanNegateR); - std::swap(MustBeFirstL, MustBeFirstR); - } - - bool NegateR; - bool NegateAfterR; - bool NegateL; - bool NegateAfterAll; - if (Opcode == TargetOpcode::G_OR) { - // Swap the sub-tree that we can negate naturally to the left. - if (!CanNegateL) { - assert(CanNegateR && "at least one side must be negatable"); - assert(!MustBeFirstR && "invalid conjunction/disjunction tree"); - assert(!Negate); - std::swap(LHS, RHS); - NegateR = false; - NegateAfterR = true; - } else { - // Negate the left sub-tree if possible, otherwise negate the result. - NegateR = CanNegateR; - NegateAfterR = !CanNegateR; - } - NegateL = true; - NegateAfterAll = !Negate; - } else { - assert(Opcode == TargetOpcode::G_AND && - "Valid conjunction/disjunction tree"); - assert(!Negate && "Valid conjunction/disjunction tree"); - - NegateL = false; - NegateR = false; - NegateAfterR = false; - NegateAfterAll = false; - } - - // Emit sub-trees. - AArch64CC::CondCode RHSCC; - MachineInstr *CmpR = - emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB); - if (NegateAfterR) - RHSCC = AArch64CC::getInvertedCondCode(RHSCC); - MachineInstr *CmpL = emitConjunctionRec( - LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB); - if (NegateAfterAll) - OutCC = AArch64CC::getInvertedCondCode(OutCC); - return CmpL; -} - -MachineInstr *AArch64InstructionSelector::emitConjunction( - Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const { - bool DummyCanNegate; - bool DummyMustBeFirst; - if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false, - *MIB.getMRI())) - return nullptr; - return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB); -} - -bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI, - MachineInstr &CondMI) { - MachineRegisterInfo &MRI = *MIB.getMRI(); - AArch64CC::CondCode AArch64CC; - MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB); - if (!ConjMI) - return false; - auto CSel = - MIB.buildInstr(MRI.getType(SelI.getReg(0)).getSizeInBits() == 32 - ? AArch64::CSELWr - : AArch64::CSELXr, - {SelI.getReg(0)}, {SelI.getTrueReg(), SelI.getFalseReg()}) - .addImm(AArch64CC); - constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI); - SelI.eraseFromParent(); - return true; -} - -bool AArch64InstructionSelector::tryOptSelect(GSelect &I) { +bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) { MachineRegisterInfo &MRI = *MIB.getMRI(); // We want to recognize this pattern: // @@ -5113,11 +4755,8 @@ bool AArch64InstructionSelector::tryOptSelect(GSelect &I) { return false; unsigned CondOpc = CondDef->getOpcode(); - if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) { - if (tryOptSelectConjunction(I, *CondDef)) - return true; + if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) return false; - } AArch64CC::CondCode CondCode; if (CondOpc == TargetOpcode::G_ICMP) { diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll index 58bf419..f81ed69b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll @@ -569,10 +569,14 @@ define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { ; ; GISEL-LABEL: select_and: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #5 -; GISEL-NEXT: cmp w8, w1 -; GISEL-NEXT: ccmp w0, w1, #0, ne -; GISEL-NEXT: csel x0, x2, x3, lt +; GISEL-NEXT: cmp w0, w1 +; GISEL-NEXT: cset w8, lt +; GISEL-NEXT: mov w9, #5 +; GISEL-NEXT: cmp w9, w1 +; GISEL-NEXT: cset w9, ne +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel x0, x2, x3, ne ; GISEL-NEXT: ret %1 = icmp slt i32 %w0, %w1 %2 = icmp ne i32 5, %w1 @@ -591,10 +595,14 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) { ; ; GISEL-LABEL: select_or: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #5 -; GISEL-NEXT: cmp w8, w1 -; GISEL-NEXT: ccmp w0, w1, #8, eq -; GISEL-NEXT: csel x0, x2, x3, lt +; GISEL-NEXT: cmp w0, w1 +; GISEL-NEXT: cset w8, lt +; GISEL-NEXT: mov w9, #5 +; GISEL-NEXT: cmp w9, w1 +; GISEL-NEXT: cset w9, ne +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel x0, x2, x3, ne ; GISEL-NEXT: ret %1 = icmp slt i32 %w0, %w1 %2 = icmp ne i32 5, %w1 @@ -615,13 +623,17 @@ define i64 @gccbug(i64 %x0, i64 %x1) { ; ; GISEL-LABEL: gccbug: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #2 -; GISEL-NEXT: mov w9, #4 -; GISEL-NEXT: mov w10, #1 +; GISEL-NEXT: cmp x1, #0 +; GISEL-NEXT: cset w8, eq +; GISEL-NEXT: mov w9, #2 ; GISEL-NEXT: cmp x0, #2 -; GISEL-NEXT: ccmp x0, x9, #4, ne -; GISEL-NEXT: ccmp x1, xzr, #0, eq -; GISEL-NEXT: csel x0, x8, x10, eq +; GISEL-NEXT: cset w10, eq +; GISEL-NEXT: cmp x0, #4 +; GISEL-NEXT: cset w11, eq +; GISEL-NEXT: orr w10, w11, w10 +; GISEL-NEXT: and w8, w10, w8 +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csinc x0, x9, xzr, ne ; GISEL-NEXT: ret %cmp0 = icmp eq i64 %x1, 0 %cmp1 = icmp eq i64 %x0, 2 @@ -646,13 +658,19 @@ define i32 @select_ororand(i32 %w0, i32 %w1, i32 %w2, i32 %w3) { ; ; GISEL-LABEL: select_ororand: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #13 -; GISEL-NEXT: mov w9, #2 +; GISEL-NEXT: cmp w0, #0 +; GISEL-NEXT: cset w8, eq +; GISEL-NEXT: cmp w1, #13 +; GISEL-NEXT: cset w9, hi +; GISEL-NEXT: cmp w2, #2 +; GISEL-NEXT: cset w10, lt ; GISEL-NEXT: cmp w3, #4 -; GISEL-NEXT: ccmp w2, w9, #0, gt -; GISEL-NEXT: ccmp w1, w8, #2, ge -; GISEL-NEXT: ccmp w0, wzr, #4, ls -; GISEL-NEXT: csel w0, w3, wzr, eq +; GISEL-NEXT: cset w11, gt +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w3, wzr, ne ; GISEL-NEXT: ret %c0 = icmp eq i32 %w0, 0 %c1 = icmp ugt i32 %w1, 13 @@ -676,10 +694,16 @@ define i32 @select_andor(i32 %v1, i32 %v2, i32 %v3) { ; ; GISEL-LABEL: select_andor: ; GISEL: ; %bb.0: +; GISEL-NEXT: cmp w0, w1 +; GISEL-NEXT: cset w8, eq ; GISEL-NEXT: cmp w1, w2 -; GISEL-NEXT: ccmp w0, wzr, #4, lt -; GISEL-NEXT: ccmp w0, w1, #0, eq -; GISEL-NEXT: csel w0, w0, w1, eq +; GISEL-NEXT: cset w9, ge +; GISEL-NEXT: cmp w0, #0 +; GISEL-NEXT: cset w10, eq +; GISEL-NEXT: orr w9, w10, w9 +; GISEL-NEXT: and w8, w9, w8 +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w0, w1, ne ; GISEL-NEXT: ret %c0 = icmp eq i32 %v1, %v2 %c1 = icmp sge i32 %v2, %v3 @@ -848,9 +872,14 @@ define i32 @select_and_olt_one(double %v0, double %v1, double %v2, double %v3, i ; GISEL-LABEL: select_and_olt_one: ; GISEL: ; %bb.0: ; GISEL-NEXT: fcmp d0, d1 -; GISEL-NEXT: fccmp d2, d3, #4, mi -; GISEL-NEXT: fccmp d2, d3, #1, ne -; GISEL-NEXT: csel w0, w0, w1, vc +; GISEL-NEXT: cset w8, mi +; GISEL-NEXT: fcmp d2, d3 +; GISEL-NEXT: cset w9, mi +; GISEL-NEXT: cset w10, gt +; GISEL-NEXT: orr w9, w9, w10 +; GISEL-NEXT: and w8, w9, w8 +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w0, w1, ne ; GISEL-NEXT: ret %c0 = fcmp olt double %v0, %v1 %c1 = fcmp one double %v2, %v3 @@ -871,9 +900,14 @@ define i32 @select_and_one_olt(double %v0, double %v1, double %v2, double %v3, i ; GISEL-LABEL: select_and_one_olt: ; GISEL: ; %bb.0: ; GISEL-NEXT: fcmp d0, d1 -; GISEL-NEXT: fccmp d0, d1, #1, ne -; GISEL-NEXT: fccmp d2, d3, #0, vc -; GISEL-NEXT: csel w0, w0, w1, mi +; GISEL-NEXT: cset w8, mi +; GISEL-NEXT: cset w9, gt +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fcmp d2, d3 +; GISEL-NEXT: cset w9, mi +; GISEL-NEXT: and w8, w9, w8 +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w0, w1, ne ; GISEL-NEXT: ret %c0 = fcmp one double %v0, %v1 %c1 = fcmp olt double %v2, %v3 @@ -894,9 +928,14 @@ define i32 @select_and_olt_ueq(double %v0, double %v1, double %v2, double %v3, i ; GISEL-LABEL: select_and_olt_ueq: ; GISEL: ; %bb.0: ; GISEL-NEXT: fcmp d0, d1 -; GISEL-NEXT: fccmp d2, d3, #0, mi -; GISEL-NEXT: fccmp d2, d3, #8, le -; GISEL-NEXT: csel w0, w0, w1, pl +; GISEL-NEXT: cset w8, mi +; GISEL-NEXT: fcmp d2, d3 +; GISEL-NEXT: cset w9, eq +; GISEL-NEXT: cset w10, vs +; GISEL-NEXT: orr w9, w9, w10 +; GISEL-NEXT: and w8, w9, w8 +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w0, w1, ne ; GISEL-NEXT: ret %c0 = fcmp olt double %v0, %v1 %c1 = fcmp ueq double %v2, %v3 @@ -917,9 +956,14 @@ define i32 @select_and_ueq_olt(double %v0, double %v1, double %v2, double %v3, i ; GISEL-LABEL: select_and_ueq_olt: ; GISEL: ; %bb.0: ; GISEL-NEXT: fcmp d0, d1 -; GISEL-NEXT: fccmp d0, d1, #8, le -; GISEL-NEXT: fccmp d2, d3, #0, pl -; GISEL-NEXT: csel w0, w0, w1, mi +; GISEL-NEXT: cset w8, eq +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fcmp d2, d3 +; GISEL-NEXT: cset w9, mi +; GISEL-NEXT: and w8, w9, w8 +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w0, w1, ne ; GISEL-NEXT: ret %c0 = fcmp ueq double %v0, %v1 %c1 = fcmp olt double %v2, %v3 @@ -940,9 +984,14 @@ define i32 @select_or_olt_one(double %v0, double %v1, double %v2, double %v3, i3 ; GISEL-LABEL: select_or_olt_one: ; GISEL: ; %bb.0: ; GISEL-NEXT: fcmp d0, d1 -; GISEL-NEXT: fccmp d2, d3, #0, pl -; GISEL-NEXT: fccmp d2, d3, #8, le -; GISEL-NEXT: csel w0, w0, w1, mi +; GISEL-NEXT: cset w8, mi +; GISEL-NEXT: fcmp d2, d3 +; GISEL-NEXT: cset w9, mi +; GISEL-NEXT: cset w10, gt +; GISEL-NEXT: orr w9, w9, w10 +; GISEL-NEXT: orr w8, w9, w8 +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w0, w1, ne ; GISEL-NEXT: ret %c0 = fcmp olt double %v0, %v1 %c1 = fcmp one double %v2, %v3 @@ -963,9 +1012,14 @@ define i32 @select_or_one_olt(double %v0, double %v1, double %v2, double %v3, i3 ; GISEL-LABEL: select_or_one_olt: ; GISEL: ; %bb.0: ; GISEL-NEXT: fcmp d0, d1 -; GISEL-NEXT: fccmp d0, d1, #8, le -; GISEL-NEXT: fccmp d2, d3, #8, pl -; GISEL-NEXT: csel w0, w0, w1, mi +; GISEL-NEXT: cset w8, mi +; GISEL-NEXT: cset w9, gt +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fcmp d2, d3 +; GISEL-NEXT: cset w9, mi +; GISEL-NEXT: orr w8, w9, w8 +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w0, w1, ne ; GISEL-NEXT: ret %c0 = fcmp one double %v0, %v1 %c1 = fcmp olt double %v2, %v3 @@ -986,9 +1040,14 @@ define i32 @select_or_olt_ueq(double %v0, double %v1, double %v2, double %v3, i3 ; GISEL-LABEL: select_or_olt_ueq: ; GISEL: ; %bb.0: ; GISEL-NEXT: fcmp d0, d1 -; GISEL-NEXT: fccmp d2, d3, #4, pl -; GISEL-NEXT: fccmp d2, d3, #1, ne -; GISEL-NEXT: csel w0, w0, w1, vs +; GISEL-NEXT: cset w8, mi +; GISEL-NEXT: fcmp d2, d3 +; GISEL-NEXT: cset w9, eq +; GISEL-NEXT: cset w10, vs +; GISEL-NEXT: orr w9, w9, w10 +; GISEL-NEXT: orr w8, w9, w8 +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w0, w1, ne ; GISEL-NEXT: ret %c0 = fcmp olt double %v0, %v1 %c1 = fcmp ueq double %v2, %v3 @@ -1009,9 +1068,14 @@ define i32 @select_or_ueq_olt(double %v0, double %v1, double %v2, double %v3, i3 ; GISEL-LABEL: select_or_ueq_olt: ; GISEL: ; %bb.0: ; GISEL-NEXT: fcmp d0, d1 -; GISEL-NEXT: fccmp d0, d1, #1, ne -; GISEL-NEXT: fccmp d2, d3, #8, vc -; GISEL-NEXT: csel w0, w0, w1, mi +; GISEL-NEXT: cset w8, eq +; GISEL-NEXT: cset w9, vs +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fcmp d2, d3 +; GISEL-NEXT: cset w9, mi +; GISEL-NEXT: orr w8, w9, w8 +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w0, w1, ne ; GISEL-NEXT: ret %c0 = fcmp ueq double %v0, %v1 %c1 = fcmp olt double %v2, %v3 @@ -1033,10 +1097,17 @@ define i32 @select_or_olt_ogt_ueq(double %v0, double %v1, double %v2, double %v3 ; GISEL-LABEL: select_or_olt_ogt_ueq: ; GISEL: ; %bb.0: ; GISEL-NEXT: fcmp d0, d1 -; GISEL-NEXT: fccmp d2, d3, #0, pl -; GISEL-NEXT: fccmp d4, d5, #4, le -; GISEL-NEXT: fccmp d4, d5, #1, ne -; GISEL-NEXT: csel w0, w0, w1, vs +; GISEL-NEXT: cset w8, mi +; GISEL-NEXT: fcmp d2, d3 +; GISEL-NEXT: cset w9, gt +; GISEL-NEXT: fcmp d4, d5 +; GISEL-NEXT: cset w10, eq +; GISEL-NEXT: cset w11, vs +; GISEL-NEXT: orr w10, w10, w11 +; GISEL-NEXT: orr w8, w9, w8 +; GISEL-NEXT: orr w8, w10, w8 +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w0, w1, ne ; GISEL-NEXT: ret %c0 = fcmp olt double %v0, %v1 %c1 = fcmp ogt double %v2, %v3 @@ -1060,10 +1131,17 @@ define i32 @select_or_olt_ueq_ogt(double %v0, double %v1, double %v2, double %v3 ; GISEL-LABEL: select_or_olt_ueq_ogt: ; GISEL: ; %bb.0: ; GISEL-NEXT: fcmp d0, d1 -; GISEL-NEXT: fccmp d2, d3, #4, pl -; GISEL-NEXT: fccmp d2, d3, #1, ne -; GISEL-NEXT: fccmp d4, d5, #0, vc -; GISEL-NEXT: csel w0, w0, w1, gt +; GISEL-NEXT: cset w8, mi +; GISEL-NEXT: fcmp d2, d3 +; GISEL-NEXT: cset w9, eq +; GISEL-NEXT: cset w10, vs +; GISEL-NEXT: orr w9, w9, w10 +; GISEL-NEXT: fcmp d4, d5 +; GISEL-NEXT: cset w10, gt +; GISEL-NEXT: orr w8, w9, w8 +; GISEL-NEXT: orr w8, w10, w8 +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w0, w1, ne ; GISEL-NEXT: ret %c0 = fcmp olt double %v0, %v1 %c1 = fcmp ueq double %v2, %v3 @@ -1092,11 +1170,15 @@ define i32 @half_select_and_olt_oge(half %v0, half %v1, half %v2, half %v3, i32 ; GISEL: ; %bb.0: ; GISEL-NEXT: fcvt s0, h0 ; GISEL-NEXT: fcvt s1, h1 -; GISEL-NEXT: fcvt s2, h2 -; GISEL-NEXT: fcvt s3, h3 ; GISEL-NEXT: fcmp s0, s1 -; GISEL-NEXT: fccmp s2, s3, #8, mi -; GISEL-NEXT: csel w0, w0, w1, ge +; GISEL-NEXT: cset w8, mi +; GISEL-NEXT: fcvt s0, h2 +; GISEL-NEXT: fcvt s1, h3 +; GISEL-NEXT: fcmp s0, s1 +; GISEL-NEXT: cset w9, ge +; GISEL-NEXT: and w8, w9, w8 +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w0, w1, ne ; GISEL-NEXT: ret %c0 = fcmp olt half %v0, %v1 %c1 = fcmp oge half %v2, %v3 @@ -1122,12 +1204,17 @@ define i32 @half_select_and_olt_one(half %v0, half %v1, half %v2, half %v3, i32 ; GISEL: ; %bb.0: ; GISEL-NEXT: fcvt s0, h0 ; GISEL-NEXT: fcvt s1, h1 -; GISEL-NEXT: fcvt s2, h2 -; GISEL-NEXT: fcvt s3, h3 ; GISEL-NEXT: fcmp s0, s1 -; GISEL-NEXT: fccmp s2, s3, #4, mi -; GISEL-NEXT: fccmp s2, s3, #1, ne -; GISEL-NEXT: csel w0, w0, w1, vc +; GISEL-NEXT: cset w8, mi +; GISEL-NEXT: fcvt s0, h2 +; GISEL-NEXT: fcvt s1, h3 +; GISEL-NEXT: fcmp s0, s1 +; GISEL-NEXT: cset w9, mi +; GISEL-NEXT: cset w10, gt +; GISEL-NEXT: orr w9, w9, w10 +; GISEL-NEXT: and w8, w9, w8 +; GISEL-NEXT: tst w8, #0x1 +; GISEL-NEXT: csel w0, w0, w1, ne ; GISEL-NEXT: ret %c0 = fcmp olt half %v0, %v1 %c1 = fcmp one half %v2, %v3 @@ -1207,11 +1294,18 @@ define i32 @deep_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) { ; ; GISEL-LABEL: deep_or: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #15 +; GISEL-NEXT: cmp w0, #0 +; GISEL-NEXT: cset w8, ne +; GISEL-NEXT: cmp w1, #0 +; GISEL-NEXT: cset w9, ne +; GISEL-NEXT: cmp w2, #15 +; GISEL-NEXT: cset w10, eq ; GISEL-NEXT: cmp w2, #20 -; GISEL-NEXT: ccmp w2, w8, #4, ne -; GISEL-NEXT: ccmp w1, wzr, #4, eq -; GISEL-NEXT: ccmp w0, wzr, #4, ne +; GISEL-NEXT: cset w11, eq +; GISEL-NEXT: orr w10, w10, w11 +; GISEL-NEXT: and w9, w10, w9 +; GISEL-NEXT: and w8, w9, w8 +; GISEL-NEXT: tst w8, #0x1 ; GISEL-NEXT: csel w0, w4, w5, ne ; GISEL-NEXT: ret %c0 = icmp ne i32 %a0, 0 @@ -1239,11 +1333,18 @@ define i32 @deep_or1(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) { ; ; GISEL-LABEL: deep_or1: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #15 +; GISEL-NEXT: cmp w0, #0 +; GISEL-NEXT: cset w8, ne +; GISEL-NEXT: cmp w1, #0 +; GISEL-NEXT: cset w9, ne +; GISEL-NEXT: cmp w2, #15 +; GISEL-NEXT: cset w10, eq ; GISEL-NEXT: cmp w2, #20 -; GISEL-NEXT: ccmp w2, w8, #4, ne -; GISEL-NEXT: ccmp w0, wzr, #4, eq -; GISEL-NEXT: ccmp w1, wzr, #4, ne +; GISEL-NEXT: cset w11, eq +; GISEL-NEXT: orr w10, w10, w11 +; GISEL-NEXT: and w8, w8, w10 +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: tst w8, #0x1 ; GISEL-NEXT: csel w0, w4, w5, ne ; GISEL-NEXT: ret %c0 = icmp ne i32 %a0, 0 @@ -1271,11 +1372,18 @@ define i32 @deep_or2(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) { ; ; GISEL-LABEL: deep_or2: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #15 +; GISEL-NEXT: cmp w0, #0 +; GISEL-NEXT: cset w8, ne +; GISEL-NEXT: cmp w1, #0 +; GISEL-NEXT: cset w9, ne +; GISEL-NEXT: cmp w2, #15 +; GISEL-NEXT: cset w10, eq ; GISEL-NEXT: cmp w2, #20 -; GISEL-NEXT: ccmp w2, w8, #4, ne -; GISEL-NEXT: ccmp w1, wzr, #4, eq -; GISEL-NEXT: ccmp w0, wzr, #4, ne +; GISEL-NEXT: cset w11, eq +; GISEL-NEXT: orr w10, w10, w11 +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: and w8, w8, w10 +; GISEL-NEXT: tst w8, #0x1 ; GISEL-NEXT: csel w0, w4, w5, ne ; GISEL-NEXT: ret %c0 = icmp ne i32 %a0, 0 -- 2.7.4