From 62ff9960d337cd24d633d7dd38b49baaee05cf26 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Tue, 5 Nov 2019 14:28:13 +0100 Subject: [PATCH] [SystemZ] Improve foldMemoryOperandImpl(). Swap the compare operands if LHS is spilled while updating the CCMask:s of the CC users. This is relatively straight forward since the live-in lists for the CC register can be assumed to be correct during register allocation (thanks to 659efa2). Also fold a spilled operand of an LOCR/SELR into an LOC(G). Review: Ulrich Weigand Differential Revision: https://reviews.llvm.org/D67437 --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 23 +- llvm/lib/Target/SystemZ/SystemZInstrFormats.td | 51 +++- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 115 +++++++-- llvm/lib/Target/SystemZ/SystemZInstrInfo.h | 10 + llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 8 +- llvm/test/CodeGen/SystemZ/cond-move-10.ll | 100 ++++++++ llvm/test/CodeGen/SystemZ/int-cmp-56.mir | 323 ++++++++++++++++++++++++ 7 files changed, 585 insertions(+), 45 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/cond-move-10.ll create mode 100644 llvm/test/CodeGen/SystemZ/int-cmp-56.mir diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 37f0041..ddced5a 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2190,15 +2190,6 @@ static bool shouldSwapCmpOperands(const Comparison &C) { return false; } -// Return a version of comparison CC mask CCMask in which the LT and GT -// actions are swapped. -static unsigned reverseCCMask(unsigned CCMask) { - return ((CCMask & SystemZ::CCMASK_CMP_EQ) | - (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) | - (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) | - (CCMask & SystemZ::CCMASK_CMP_UO)); -} - // Check whether C tests for equality between X and Y and whether X - Y // or Y - X is also computed. In that case it's better to compare the // result of the subtraction against zero. @@ -2234,7 +2225,7 @@ static void adjustForFNeg(Comparison &C) { SDNode *N = *I; if (N->getOpcode() == ISD::FNEG) { C.Op0 = SDValue(N, 0); - C.CCMask = reverseCCMask(C.CCMask); + C.CCMask = SystemZ::reverseCCMask(C.CCMask); return; } } @@ -2601,7 +2592,7 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, if (shouldSwapCmpOperands(C)) { std::swap(C.Op0, C.Op1); - C.CCMask = reverseCCMask(C.CCMask); + C.CCMask = SystemZ::reverseCCMask(C.CCMask); } adjustForTestUnderMask(DAG, DL, C); @@ -6277,15 +6268,7 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { return false; // Compute the effective CC mask for the new branch or select. - switch (CCMask) { - case SystemZ::CCMASK_CMP_EQ: break; - case SystemZ::CCMASK_CMP_NE: break; - case SystemZ::CCMASK_CMP_LT: CCMask = SystemZ::CCMASK_CMP_GT; break; - case SystemZ::CCMASK_CMP_GT: CCMask = SystemZ::CCMASK_CMP_LT; break; - case SystemZ::CCMASK_CMP_LE: CCMask = SystemZ::CCMASK_CMP_GE; break; - case SystemZ::CCMASK_CMP_GE: CCMask = SystemZ::CCMASK_CMP_LE; break; - default: return false; - } + CCMask = SystemZ::reverseCCMask(CCMask); // Return the updated CCReg link. CCReg = IPM->getOperand(0); diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index f064d33..f0c117f 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -2807,6 +2807,10 @@ class CondUnaryRSY opcode, let mayLoad = 1; let AccessBytes = bytes; let CCMaskLast = 1; + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let MemKey = mnemonic#cls; + let MemType = "target"; } // Like CondUnaryRSY, but used for the raw assembly form. The condition-code @@ -3211,6 +3215,8 @@ class CondBinaryRRF opcode, RegisterOperand cls1, let CCMaskLast = 1; let NumOpsKey = !subst("loc", "sel", mnemonic); let NumOpsValue = "2"; + let OpKey = mnemonic#cls1; + let OpType = "reg"; } // Like CondBinaryRRF, but used for the raw assembly form. The condition-code @@ -3252,6 +3258,8 @@ class CondBinaryRRFa opcode, RegisterOperand cls1, let CCMaskLast = 1; let NumOpsKey = mnemonic; let NumOpsValue = "3"; + let OpKey = mnemonic#cls1; + let OpType = "reg"; } // Like CondBinaryRRFa, but used for the raw assembly form. The condition-code @@ -4775,6 +4783,20 @@ class MemFoldPseudo bytes, let hasNoSchedulingInfo = 1; } +// Same as MemFoldPseudo but for Load On Condition with CC operands. +class MemFoldPseudo_CondMove bytes, + AddressingMode mode> + : Pseudo<(outs cls:$R1), + (ins cls:$R2, mode:$XBD2, cond4:$valid, cond4:$M3), []> { + let OpKey = !subst("loc", "sel", mnemonic)#"r"#cls; + let OpType = "mem"; + let MemKey = mnemonic#cls; + let MemType = "pseudo"; + let mayLoad = 1; + let AccessBytes = bytes; + let hasNoSchedulingInfo = 1; +} + // Like CompareRI, but expanded after RA depending on the choice of register. class CompareRIPseudo @@ -4813,6 +4835,8 @@ class CondBinaryRRFPseudo // Like CondUnaryRSY, but expanded after RA depending on the choice of // register. -class CondUnaryRSYPseudo bytes, AddressingMode mode = bdaddr20only> +class CondUnaryRSYPseudo bytes, + AddressingMode mode = bdaddr20only> : Pseudo<(outs cls:$R1), (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3), [(set cls:$R1, @@ -4854,6 +4881,10 @@ class CondUnaryRSYPseudo rxOpcode, def _MemFoldPseudo : MemFoldPseudo; } +multiclass CondUnaryRSYPairAndMemFold opcode, + SDPatternOperator operator, + RegisterOperand cls, bits<5> bytes, + AddressingMode mode = bdaddr20only> { + defm "" : CondUnaryRSYPair; + def _MemFoldPseudo : MemFoldPseudo_CondMove; +} + +multiclass CondUnaryRSYPseudoAndMemFold bytes, + AddressingMode mode = bdaddr20only> { + def "" : CondUnaryRSYPseudo; + def _MemFoldPseudo : MemFoldPseudo_CondMove; +} + // Define an instruction that operates on two fixed-length blocks of memory, // and associated pseudo instructions for operating on blocks of any size. // The Sequence form uses a straight-line sequence of instructions and diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 7161afe..ee347ce 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -1150,14 +1150,31 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( // commutable, try to change R into . unsigned NumOps = MI.getNumExplicitOperands(); int MemOpcode = SystemZ::getMemOpcode(Opcode); + if (MemOpcode == -1) + return nullptr; + + // Try to swap compare operands if possible. + bool NeedsCommute = false; + if ((MI.getOpcode() == SystemZ::CR || MI.getOpcode() == SystemZ::CGR || + MI.getOpcode() == SystemZ::CLR || MI.getOpcode() == SystemZ::CLGR) && + OpNum == 0 && prepareCompareSwapOperands(MI)) + NeedsCommute = true; + + bool CCOperands = false; + if (MI.getOpcode() == SystemZ::LOCRMux || MI.getOpcode() == SystemZ::LOCGR || + MI.getOpcode() == SystemZ::SELRMux || MI.getOpcode() == SystemZ::SELGR) { + assert(MI.getNumOperands() == 6 && NumOps == 5 && + "LOCR/SELR instruction operands corrupt?"); + NumOps -= 2; + CCOperands = true; + } // See if this is a 3-address instruction that is convertible to 2-address // and suitable for folding below. Only try this with virtual registers // and a provided VRM (during regalloc). - bool NeedsCommute = false; - if (SystemZ::getTwoOperandOpcode(Opcode) != -1 && MemOpcode != -1) { + if (SystemZ::getTwoOperandOpcode(Opcode) != -1) { if (VRM == nullptr) - MemOpcode = -1; + return nullptr; else { assert(NumOps == 3 && "Expected two source registers."); Register DstReg = MI.getOperand(0).getReg(); @@ -1172,32 +1189,42 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( DstPhys == VRM->getPhys(SrcReg)) NeedsCommute = (OpNum == 1); else - MemOpcode = -1; + return nullptr; } } - if (MemOpcode >= 0) { - if ((OpNum == NumOps - 1) || NeedsCommute) { - const MCInstrDesc &MemDesc = get(MemOpcode); - uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); - assert(AccessBytes != 0 && "Size of access should be known"); - assert(AccessBytes <= Size && "Access outside the frame index"); - uint64_t Offset = Size - AccessBytes; - MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, - MI.getDebugLoc(), get(MemOpcode)); + if ((OpNum == NumOps - 1) || NeedsCommute) { + const MCInstrDesc &MemDesc = get(MemOpcode); + uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); + assert(AccessBytes != 0 && "Size of access should be known"); + assert(AccessBytes <= Size && "Access outside the frame index"); + uint64_t Offset = Size - AccessBytes; + MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, + MI.getDebugLoc(), get(MemOpcode)); + if (MI.isCompare()) { + assert(NumOps == 2 && "Expected 2 register operands for a compare."); + MIB.add(MI.getOperand(NeedsCommute ? 1 : 0)); + } + else { MIB.add(MI.getOperand(0)); if (NeedsCommute) MIB.add(MI.getOperand(2)); else for (unsigned I = 1; I < OpNum; ++I) MIB.add(MI.getOperand(I)); - MIB.addFrameIndex(FrameIndex).addImm(Offset); - if (MemDesc.TSFlags & SystemZII::HasIndex) - MIB.addReg(0); - transferDeadCC(&MI, MIB); - transferMIFlag(&MI, MIB, MachineInstr::NoSWrap); - return MIB; } + MIB.addFrameIndex(FrameIndex).addImm(Offset); + if (MemDesc.TSFlags & SystemZII::HasIndex) + MIB.addReg(0); + if (CCOperands) { + unsigned CCValid = MI.getOperand(NumOps).getImm(); + unsigned CCMask = MI.getOperand(NumOps + 1).getImm(); + MIB.addImm(CCValid); + MIB.addImm(NeedsCommute ? CCMask ^ CCValid : CCMask); + } + transferDeadCC(&MI, MIB); + transferMIFlag(&MI, MIB, MachineInstr::NoSWrap); + return MIB; } return nullptr; @@ -1706,6 +1733,56 @@ unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode, return 0; } +bool SystemZInstrInfo:: +prepareCompareSwapOperands(MachineBasicBlock::iterator const MBBI) const { + assert(MBBI->isCompare() && MBBI->getOperand(0).isReg() && + MBBI->getOperand(1).isReg() && !MBBI->mayLoad() && + "Not a compare reg/reg."); + + MachineBasicBlock *MBB = MBBI->getParent(); + bool CCLive = true; + SmallVector CCUsers; + for (MachineBasicBlock::iterator Itr = std::next(MBBI); + Itr != MBB->end(); ++Itr) { + if (Itr->readsRegister(SystemZ::CC)) { + unsigned Flags = Itr->getDesc().TSFlags; + if ((Flags & SystemZII::CCMaskFirst) || (Flags & SystemZII::CCMaskLast)) + CCUsers.push_back(&*Itr); + else + return false; + } + if (Itr->definesRegister(SystemZ::CC)) { + CCLive = false; + break; + } + } + if (CCLive) { + LivePhysRegs LiveRegs(*MBB->getParent()->getSubtarget().getRegisterInfo()); + LiveRegs.addLiveOuts(*MBB); + if (LiveRegs.contains(SystemZ::CC)) + return false; + } + + // Update all CC users. + for (unsigned Idx = 0; Idx < CCUsers.size(); ++Idx) { + unsigned Flags = CCUsers[Idx]->getDesc().TSFlags; + unsigned FirstOpNum = ((Flags & SystemZII::CCMaskFirst) ? + 0 : CCUsers[Idx]->getNumExplicitOperands() - 2); + MachineOperand &CCMaskMO = CCUsers[Idx]->getOperand(FirstOpNum + 1); + unsigned NewCCMask = SystemZ::reverseCCMask(CCMaskMO.getImm()); + CCMaskMO.setImm(NewCCMask); + } + + return true; +} + +unsigned SystemZ::reverseCCMask(unsigned CCMask) { + return ((CCMask & SystemZ::CCMASK_CMP_EQ) | + (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) | + (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) | + (CCMask & SystemZ::CCMASK_CMP_UO)); +} + unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const { if (!STI.hasLoadAndTrap()) return 0; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index 23d3d53..5654190 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -155,6 +155,10 @@ enum FusedCompareType { namespace SystemZ { int getTwoOperandOpcode(uint16_t Opcode); int getTargetMemOpcode(uint16_t Opcode); + +// Return a version of comparison CC mask CCMask in which the LT and GT +// actions are swapped. +unsigned reverseCCMask(unsigned CCMask); } class SystemZInstrInfo : public SystemZGenInstrInfo { @@ -314,6 +318,12 @@ public: SystemZII::FusedCompareType Type, const MachineInstr *MI = nullptr) const; + // Try to find all CC users of the compare instruction (MBBI) and update + // all of them to maintain equivalent behavior after swapping the compare + // operands. Return false if not all users can be conclusively found and + // handled. The compare instruction is *not* changed. + bool prepareCompareSwapOperands(MachineBasicBlock::iterator MBBI) const; + // If Opcode is a LOAD opcode for with an associated LOAD AND TRAP // operation exists, returh the opcode for the latter, otherwise return 0. unsigned getLoadAndTrap(unsigned Opcode) const; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 9579dcc..fab6d26 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -492,7 +492,7 @@ let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in { let isCommutable = 1 in { // Expands to SELR or SELFHR or a branch-and-move sequence, // depending on the choice of registers. - def SELRMux : CondBinaryRRFaPseudo<"selrmux", GRX32, GRX32, GRX32>; + def SELRMux : CondBinaryRRFaPseudo<"MUXselr", GRX32, GRX32, GRX32>; defm SELFHR : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>; defm SELR : CondBinaryRRFaPair<"selr", 0xB9F0, GR32, GR32, GR32>; defm SELGR : CondBinaryRRFaPair<"selgr", 0xB9E3, GR64, GR64, GR64>; @@ -525,13 +525,13 @@ let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in { let isCommutable = 1 in { // Expands to LOCR or LOCFHR or a branch-and-move sequence, // depending on the choice of registers. - def LOCRMux : CondBinaryRRFPseudo<"locrmux", GRX32, GRX32>; + def LOCRMux : CondBinaryRRFPseudo<"MUXlocr", GRX32, GRX32>; defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>; } // Load on condition. Matched via DAG pattern. // Expands to LOC or LOCFH, depending on the choice of register. - def LOCMux : CondUnaryRSYPseudo; + defm LOCMux : CondUnaryRSYPseudoAndMemFold<"MUXloc", simple_load, GRX32, 4>; defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, simple_load, GRH32, 4>; // Store on condition. Expanded from CondStore* pseudos. @@ -564,7 +564,7 @@ let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in { // Load on condition. Matched via DAG pattern. defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, simple_load, GR32, 4>; - defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, simple_load, GR64, 8>; + defm LOCG : CondUnaryRSYPairAndMemFold<"locg", 0xEBE2, simple_load, GR64, 8>; // Store on condition. Expanded from CondStore* pseudos. defm STOC : CondStoreRSYPair<"stoc", 0xEBF3, GR32, 4>; diff --git a/llvm/test/CodeGen/SystemZ/cond-move-10.ll b/llvm/test/CodeGen/SystemZ/cond-move-10.ll new file mode 100644 index 0000000..eef9365 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/cond-move-10.ll @@ -0,0 +1,100 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s +; +; Test that a reload of a LOCGR/SELGR operand can be folded into a LOC +; instruction. + +declare i64 @foo() +declare i32 @foo32() + +; Check that conditional loads of spilled values can use LOCG rather than LOCGR. +define void @f0(i64 *%ptr0, i64 *%dstPtr) { +; CHECK-LABEL: f0: +; CHECK: brasl %r14, foo@PLT +; CHECK: locglh {{.*}} # 8-byte Folded Reload +; CHECK: br %r14 + %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64, i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64, i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64, i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64, i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64, i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64, i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64, i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64, i64 *%ptr0, i64 18 + + %val0 = load i64, i64 *%ptr0 + %val1 = load i64, i64 *%ptr1 + %val2 = load i64, i64 *%ptr2 + %val3 = load i64, i64 *%ptr3 + %val4 = load i64, i64 *%ptr4 + %val5 = load i64, i64 *%ptr5 + %val6 = load i64, i64 *%ptr6 + %val7 = load i64, i64 *%ptr7 + %val8 = load i64, i64 *%ptr8 + %val9 = load i64, i64 *%ptr9 + + %ret = call i64 @foo() + + %add0 = add i64 %ret, %val0 + %add1 = add i64 %add0, %val1 + %add2 = add i64 %add1, %val2 + %add3 = add i64 %add2, %val3 + %add4 = add i64 %add3, %val4 + %add5 = add i64 %add4, %val5 + %add6 = add i64 %add5, %val6 + %add7 = add i64 %add6, %val7 + %add8 = add i64 %add7, %val8 + + %cond = icmp eq i64 %add7, %add8 + %res = select i1 %cond, i64 %add8, i64 %val9 + + store i64 %res, i64* %dstPtr + ret void +} + +; Check that conditional loads of spilled values can use LOC rather than LOCR. +define void @f1(i32 *%ptr0, i32 *%dstPtr) { +; CHECK-LABEL: f1: +; CHECK: brasl %r14, foo32@PLT +; CHECK: loclh {{.*}} # 4-byte Folded Reload +; CHECK: br %r14 + %ptr1 = getelementptr i32, i32 *%ptr0, i32 2 + %ptr2 = getelementptr i32, i32 *%ptr0, i32 4 + %ptr3 = getelementptr i32, i32 *%ptr0, i32 6 + %ptr4 = getelementptr i32, i32 *%ptr0, i32 8 + %ptr5 = getelementptr i32, i32 *%ptr0, i32 10 + %ptr6 = getelementptr i32, i32 *%ptr0, i32 12 + %ptr7 = getelementptr i32, i32 *%ptr0, i32 14 + %ptr8 = getelementptr i32, i32 *%ptr0, i32 16 + %ptr9 = getelementptr i32, i32 *%ptr0, i32 18 + + %val0 = load i32, i32 *%ptr0 + %val1 = load i32, i32 *%ptr1 + %val2 = load i32, i32 *%ptr2 + %val3 = load i32, i32 *%ptr3 + %val4 = load i32, i32 *%ptr4 + %val5 = load i32, i32 *%ptr5 + %val6 = load i32, i32 *%ptr6 + %val7 = load i32, i32 *%ptr7 + %val8 = load i32, i32 *%ptr8 + %val9 = load i32, i32 *%ptr9 + + %ret = call i32 @foo32() + + %add0 = add i32 %ret, %val0 + %add1 = add i32 %add0, %val1 + %add2 = add i32 %add1, %val2 + %add3 = add i32 %add2, %val3 + %add4 = add i32 %add3, %val4 + %add5 = add i32 %add4, %val5 + %add6 = add i32 %add5, %val6 + %add7 = add i32 %add6, %val7 + %add8 = add i32 %add7, %val8 + + %cond = icmp eq i32 %add7, %add8 + %res = select i1 %cond, i32 %add8, i32 %val9 + + store i32 %res, i32* %dstPtr + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-56.mir b/llvm/test/CodeGen/SystemZ/int-cmp-56.mir new file mode 100644 index 0000000..3a29e5f --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-cmp-56.mir @@ -0,0 +1,323 @@ +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z14 -run-pass greedy %s -o - \ +# RUN: | FileCheck %s +# +# Test that a reload can be folded into a compare instruction after swapping +# operands (when the LHS register is spilled). + +--- | + declare i64 @foo() + define i64 @fun1(i64* %ptr0) { ret i64 0 } + define i64 @fun2(i64* %ptr0) { ret i64 0 } + + declare i32 @foo32() + define i32 @fun3(i32* %ptr0) { ret i32 0 } + define i32 @fun4(i32* %ptr0) { ret i32 0 } +... + + +# Test CGR -> CG +# CHECK: name: fun1 +# CHECK: CG %10, %stack.0, 0, $noreg, implicit-def $cc :: (load 8 from %stack.0) +# CHECK-NEXT: %12:gr64bit = LOCGHI %12, 8, 14, 12, implicit killed $cc +--- +name: fun1 +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: addr64bit } + - { id: 1, class: gr64bit } + - { id: 2, class: gr64bit } + - { id: 3, class: gr64bit } + - { id: 4, class: gr64bit } + - { id: 5, class: gr64bit } + - { id: 6, class: gr64bit } + - { id: 7, class: gr64bit } + - { id: 8, class: gr64bit } + - { id: 9, class: gr64bit } + - { id: 10, class: gr64bit } + - { id: 11, class: gr64bit } + - { id: 12, class: gr64bit } + - { id: 13, class: gr64bit } + - { id: 14, class: gr64bit } + - { id: 15, class: gr64bit } + - { id: 16, class: gr64bit } + - { id: 17, class: gr64bit } + - { id: 18, class: gr64bit } + - { id: 19, class: gr64bit } +liveins: + - { reg: '$r2d', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 + hasCalls: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $r2d + + %0:addr64bit = COPY $r2d + %1:gr64bit = LG %0, 0, $noreg + %2:gr64bit = LG %0, 16, $noreg + %3:gr64bit = LG %0, 32, $noreg + %4:gr64bit = LG %0, 48, $noreg + %5:gr64bit = LG %0, 64, $noreg + %6:gr64bit = LG %0, 80, $noreg + %7:gr64bit = LG %0, 96, $noreg + %8:gr64bit = LG %0, 112, $noreg + %9:gr64bit = LG %0, 128, $noreg + ADJCALLSTACKDOWN 0, 0 + CallBRASL @foo, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d + %10:gr64bit = COPY $r2d + ADJCALLSTACKUP 0, 0 + CGR %10, %1, implicit-def $cc + %12:gr64bit = COPY %10 + %12:gr64bit = LOCGHI %12, 0, 14, 10, implicit killed $cc + CGR %10, %2, implicit-def $cc + %12:gr64bit = LOCGHI %12, 1, 14, 10, implicit killed $cc + CGR %10, %3, implicit-def $cc + %12:gr64bit = LOCGHI %12, 2, 14, 10, implicit killed $cc + CGR %10, %4, implicit-def $cc + %12:gr64bit = LOCGHI %12, 3, 14, 10, implicit killed $cc + CGR %10, %5, implicit-def $cc + %12:gr64bit = LOCGHI %12, 4, 14, 10, implicit killed $cc + CGR %10, %6, implicit-def $cc + %12:gr64bit = LOCGHI %12, 5, 14, 10, implicit killed $cc + CGR %10, %7, implicit-def $cc + %12:gr64bit = LOCGHI %12, 6, 14, 10, implicit killed $cc + CGR %10, %8, implicit-def $cc + %12:gr64bit = LOCGHI %12, 7, 14, 10, implicit killed $cc + CGR %9, %10, implicit-def $cc + %12:gr64bit = LOCGHI %12, 8, 14, 10, implicit killed $cc + $r2d = COPY %12 + Return implicit $r2d +... + + +# Test CLGR -> CLG +# CHECK: name: fun2 +# CHECK: CLG %10, %stack.0, 0, $noreg, implicit-def $cc :: (load 8 from %stack.0) +# CHECK-NEXT: %12:gr64bit = LOCGHI %12, 8, 14, 12, implicit killed $cc +--- +name: fun2 +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: addr64bit } + - { id: 1, class: gr64bit } + - { id: 2, class: gr64bit } + - { id: 3, class: gr64bit } + - { id: 4, class: gr64bit } + - { id: 5, class: gr64bit } + - { id: 6, class: gr64bit } + - { id: 7, class: gr64bit } + - { id: 8, class: gr64bit } + - { id: 9, class: gr64bit } + - { id: 10, class: gr64bit } + - { id: 11, class: gr64bit } + - { id: 12, class: gr64bit } + - { id: 13, class: gr64bit } + - { id: 14, class: gr64bit } + - { id: 15, class: gr64bit } + - { id: 16, class: gr64bit } + - { id: 17, class: gr64bit } + - { id: 18, class: gr64bit } + - { id: 19, class: gr64bit } +liveins: + - { reg: '$r2d', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 + hasCalls: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $r2d + + %0:addr64bit = COPY $r2d + %1:gr64bit = LG %0, 0, $noreg + %2:gr64bit = LG %0, 16, $noreg + %3:gr64bit = LG %0, 32, $noreg + %4:gr64bit = LG %0, 48, $noreg + %5:gr64bit = LG %0, 64, $noreg + %6:gr64bit = LG %0, 80, $noreg + %7:gr64bit = LG %0, 96, $noreg + %8:gr64bit = LG %0, 112, $noreg + %9:gr64bit = LG %0, 128, $noreg + ADJCALLSTACKDOWN 0, 0 + CallBRASL @foo, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d + %10:gr64bit = COPY $r2d + ADJCALLSTACKUP 0, 0 + CGR %10, %1, implicit-def $cc + %12:gr64bit = COPY %10 + %12:gr64bit = LOCGHI %12, 0, 14, 10, implicit killed $cc + CGR %10, %2, implicit-def $cc + %12:gr64bit = LOCGHI %12, 1, 14, 10, implicit killed $cc + CGR %10, %3, implicit-def $cc + %12:gr64bit = LOCGHI %12, 2, 14, 10, implicit killed $cc + CGR %10, %4, implicit-def $cc + %12:gr64bit = LOCGHI %12, 3, 14, 10, implicit killed $cc + CGR %10, %5, implicit-def $cc + %12:gr64bit = LOCGHI %12, 4, 14, 10, implicit killed $cc + CGR %10, %6, implicit-def $cc + %12:gr64bit = LOCGHI %12, 5, 14, 10, implicit killed $cc + CGR %10, %7, implicit-def $cc + %12:gr64bit = LOCGHI %12, 6, 14, 10, implicit killed $cc + CGR %10, %8, implicit-def $cc + %12:gr64bit = LOCGHI %12, 7, 14, 10, implicit killed $cc + CLGR %9, %10, implicit-def $cc + %12:gr64bit = LOCGHI %12, 8, 14, 10, implicit killed $cc + $r2d = COPY %12 + Return implicit $r2d +... + + +# Test CR -> C +# CHECK: name: fun3 +# CHECK: C %10, %stack.0, 0, $noreg, implicit-def $cc :: (load 4 from %stack.0) +# CHECK: %12:gr32bit = LOCHIMux %12, 8, 14, 12, implicit killed $cc +--- +name: fun3 +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: addr64bit } + - { id: 1, class: gr32bit } + - { id: 2, class: gr32bit } + - { id: 3, class: gr32bit } + - { id: 4, class: gr32bit } + - { id: 5, class: gr32bit } + - { id: 6, class: gr32bit } + - { id: 7, class: gr32bit } + - { id: 8, class: gr32bit } + - { id: 9, class: gr32bit } + - { id: 10, class: gr32bit } + - { id: 11, class: gr32bit } + - { id: 12, class: gr32bit } + - { id: 13, class: gr32bit } + - { id: 14, class: gr32bit } + - { id: 15, class: gr32bit } + - { id: 16, class: gr32bit } + - { id: 17, class: gr32bit } + - { id: 18, class: gr32bit } + - { id: 19, class: gr32bit } +liveins: + - { reg: '$r2d', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 + hasCalls: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $r2d + + %0:addr64bit = COPY $r2d + %1:gr32bit = LMux %0, 0, $noreg + %2:gr32bit = LMux %0, 8, $noreg + %3:gr32bit = LMux %0, 16, $noreg + %4:gr32bit = LMux %0, 24, $noreg + %5:gr32bit = LMux %0, 32, $noreg + %6:gr32bit = LMux %0, 40, $noreg + %7:gr32bit = LMux %0, 48, $noreg + %8:gr32bit = LMux %0, 56, $noreg + %9:gr32bit = LMux %0, 64, $noreg + ADJCALLSTACKDOWN 0, 0 + CallBRASL @foo, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2l + %10:gr32bit = COPY $r2l + ADJCALLSTACKUP 0, 0 + CR %10, %1, implicit-def $cc + %12:gr32bit = COPY %10 + %12:gr32bit = LOCHIMux %12, 0, 14, 10, implicit killed $cc + CR %10, %2, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 1, 14, 10, implicit killed $cc + CR %10, %3, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 2, 14, 10, implicit killed $cc + CR %10, %4, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 3, 14, 10, implicit killed $cc + CR %10, %5, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 4, 14, 10, implicit killed $cc + CR %10, %6, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 5, 14, 10, implicit killed $cc + CR %10, %7, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 6, 14, 10, implicit killed $cc + CR %10, %8, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 7, 14, 10, implicit killed $cc + CR %9, %10, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 8, 14, 10, implicit killed $cc + $r2l = COPY %12 + Return implicit $r2l +... + + +# Test CLR -> CL +# CHECK: name: fun4 +# CHECK: CL %10, %stack.0, 0, $noreg, implicit-def $cc :: (load 4 from %stack.0) +# CHECK: %12:gr32bit = LOCHIMux %12, 8, 14, 12, implicit killed $cc +--- +name: fun4 +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: addr64bit } + - { id: 1, class: gr32bit } + - { id: 2, class: gr32bit } + - { id: 3, class: gr32bit } + - { id: 4, class: gr32bit } + - { id: 5, class: gr32bit } + - { id: 6, class: gr32bit } + - { id: 7, class: gr32bit } + - { id: 8, class: gr32bit } + - { id: 9, class: gr32bit } + - { id: 10, class: gr32bit } + - { id: 11, class: gr32bit } + - { id: 12, class: gr32bit } + - { id: 13, class: gr32bit } + - { id: 14, class: gr32bit } + - { id: 15, class: gr32bit } + - { id: 16, class: gr32bit } + - { id: 17, class: gr32bit } + - { id: 18, class: gr32bit } + - { id: 19, class: gr32bit } +liveins: + - { reg: '$r2d', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 + hasCalls: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $r2d + + %0:addr64bit = COPY $r2d + %1:gr32bit = LMux %0, 0, $noreg + %2:gr32bit = LMux %0, 8, $noreg + %3:gr32bit = LMux %0, 16, $noreg + %4:gr32bit = LMux %0, 24, $noreg + %5:gr32bit = LMux %0, 32, $noreg + %6:gr32bit = LMux %0, 40, $noreg + %7:gr32bit = LMux %0, 48, $noreg + %8:gr32bit = LMux %0, 56, $noreg + %9:gr32bit = LMux %0, 64, $noreg + ADJCALLSTACKDOWN 0, 0 + CallBRASL @foo, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2l + %10:gr32bit = COPY $r2l + ADJCALLSTACKUP 0, 0 + CR %10, %1, implicit-def $cc + %12:gr32bit = COPY %10 + %12:gr32bit = LOCHIMux %12, 0, 14, 10, implicit killed $cc + CR %10, %2, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 1, 14, 10, implicit killed $cc + CR %10, %3, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 2, 14, 10, implicit killed $cc + CR %10, %4, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 3, 14, 10, implicit killed $cc + CR %10, %5, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 4, 14, 10, implicit killed $cc + CR %10, %6, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 5, 14, 10, implicit killed $cc + CR %10, %7, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 6, 14, 10, implicit killed $cc + CR %10, %8, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 7, 14, 10, implicit killed $cc + CLR %9, %10, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 8, 14, 10, implicit killed $cc + $r2l = COPY %12 + Return implicit $r2l +... -- 2.7.4