From f3a1fce8ae411ea7135bc48495e09538d492929e Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Sat, 25 Jul 2009 00:33:29 +0000 Subject: [PATCH] Change Thumb2 jumptable codegen to one that uses two level jumps: Before: adr r12, #LJTI3_0_0 ldr pc, [r12, +r0, lsl #2] LJTI3_0_0: .long LBB3_24 .long LBB3_30 .long LBB3_31 .long LBB3_32 After: adr r12, #LJTI3_0_0 add pc, r12, +r0, lsl #2 LJTI3_0_0: b.w LBB3_24 b.w LBB3_30 b.w LBB3_31 b.w LBB3_32 This has several advantages. 1. This will make it easier to optimize this to a TBB / TBH instruction + (smaller) table. 2. This eliminate the need for ugly asm printer hack to force the address into thumb addresses (bit 0 is one). 3. Same codegen for pic and non-pic. 4. This eliminate the need to align the table so constantpool island pass won't have to over-estimate the size. Based on my calculation, the later is probably slightly faster as well since ldr pc with shifter address is very slow. That is, it should be a win as long as the HW implementation can do a reasonable job of branch predict the second branch. llvm-svn: 77024 --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 12 ++--- llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 3 -- llvm/lib/Target/ARM/ARMConstantIslandPass.cpp | 10 +--- llvm/lib/Target/ARM/ARMISelLowering.cpp | 25 +++++++--- llvm/lib/Target/ARM/ARMISelLowering.h | 1 + llvm/lib/Target/ARM/ARMInstrInfo.cpp | 3 -- llvm/lib/Target/ARM/ARMInstrInfo.td | 9 ++++ llvm/lib/Target/ARM/ARMInstrThumb2.td | 22 ++------- .../Target/ARM/AsmPrinter/ARMAsmPrinter.cpp | 47 +++++++++++-------- llvm/lib/Target/ARM/README-Thumb2.txt | 4 ++ llvm/lib/Target/ARM/Thumb1InstrInfo.cpp | 3 -- llvm/lib/Target/ARM/Thumb2InstrInfo.cpp | 7 +-- .../Thumb2/{pic-jtbl.ll => thumb2-jtbl.ll} | 3 ++ 13 files changed, 77 insertions(+), 72 deletions(-) rename llvm/test/CodeGen/Thumb2/{pic-jtbl.ll => thumb2-jtbl.ll} (93%) diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index b03c308bd2dc..0b61d4eb3d8f 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -252,9 +252,11 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, // ...likewise if it ends with a branch table followed by an unconditional // branch. The branch folder can create these, and we must get rid of them for // correctness of Thumb constant islands. - if (((SecondLastOpc == getOpcode(ARMII::BR_JTr)) || - (SecondLastOpc == getOpcode(ARMII::BR_JTm)) || - (SecondLastOpc == getOpcode(ARMII::BR_JTadd))) && + if ((SecondLastOpc == ARM::BR_JTr || + SecondLastOpc == ARM::BR_JTm || + SecondLastOpc == ARM::BR_JTadd || + SecondLastOpc == ARM::tBR_JTr || + SecondLastOpc == ARM::t2BR_JT) && (LastOpc == getOpcode(ARMII::B))) { I = LastInst; if (AllowModify) @@ -451,9 +453,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARM::BR_JTr: case ARM::BR_JTm: case ARM::BR_JTadd: - case ARM::t2BR_JTr: - case ARM::t2BR_JTm: - case ARM::t2BR_JTadd: { + case ARM::t2BR_JT: { // These are jumptable branches, i.e. a branch followed by an inlined // jumptable. The size is 4 + 4 * number of entries. unsigned NumOps = TID.getNumOperands(); diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 9333608d4d2d..949bac4fdc44 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -167,9 +167,6 @@ namespace ARMII { ADDrr, B, Bcc, - BR_JTr, - BR_JTm, - BR_JTadd, BX_RET, LDRrr, LDRri, diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index 48db39b623a8..246552127865 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -389,10 +389,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn, int UOpc = Opc; switch (Opc) { case ARM::tBR_JTr: - case ARM::t2BR_JTr: - case ARM::t2BR_JTm: - case ARM::t2BR_JTadd: - // A Thumb table jump may involve padding; for the offsets to + // A Thumb1 table jump may involve padding; for the offsets to // be right, functions containing these must be 4-byte aligned. AFI->setAlign(2U); if ((Offset+MBBSize)%4 != 0) @@ -787,10 +784,7 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, // Thumb1 jump tables require padding. They should be at the end; // following unconditional branches are removed by AnalyzeBranch. MachineInstr *ThumbJTMI = prior(MBB->end()); - if (ThumbJTMI->getOpcode() == ARM::tBR_JTr || - ThumbJTMI->getOpcode() == ARM::t2BR_JTr || - ThumbJTMI->getOpcode() == ARM::t2BR_JTm || - ThumbJTMI->getOpcode() == ARM::t2BR_JTadd) { + if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) { unsigned newMIOffset = GetOffsetOf(ThumbJTMI); unsigned oldMIOffset = newMIOffset - delta; if (oldMIOffset%4 == 0 && newMIOffset%4 != 0) { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index e3c62735d886..7e2bbcdc9749 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -402,6 +402,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::tCALL: return "ARMISD::tCALL"; case ARMISD::BRCOND: return "ARMISD::BRCOND"; case ARMISD::BR_JT: return "ARMISD::BR_JT"; + case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; case ARMISD::CMP: return "ARMISD::CMP"; @@ -1704,15 +1705,27 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); + if (Subtarget->isThumb2()) { + // Thumb2 uses a two-level jump. That is, it jumps into the jump table + // which does another jump to the destination. This also makes it easier + // to translate it to TBB / TBH later. + // FIXME: This might not work if the function is extremely large. + return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, Table, Index, + JTI, UId); + } + Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); - bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; - Addr = DAG.getLoad(isPIC ? (MVT)MVT::i32 : PTy, dl, - Chain, Addr, NULL, 0); - Chain = Addr.getValue(1); - if (isPIC) + if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { + Addr = DAG.getLoad((MVT)MVT::i32, dl, Chain, Addr, NULL, 0); + Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); - return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); + return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); + } else { + Addr = DAG.getLoad(PTy, dl, Chain, Addr, NULL, 0); + Chain = Addr.getValue(1); + return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); + } } static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index abe8ca95f5b1..10f9cea1a9dc 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -40,6 +40,7 @@ namespace llvm { tCALL, // Thumb function call. BRCOND, // Conditional branch. BR_JT, // Jumptable branch. + BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump). RET_FLAG, // Return with a flag operand. PIC_ADD, // Add with a PC operand and a PIC label. diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/llvm/lib/Target/ARM/ARMInstrInfo.cpp index 56a6b0b1d8c5..0f649d4e933b 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -70,9 +70,6 @@ getOpcode(ARMII::Op Op) const { case ARMII::ADDrr: return ARM::ADDrr; case ARMII::B: return ARM::B; case ARMII::Bcc: return ARM::Bcc; - case ARMII::BR_JTr: return ARM::BR_JTr; - case ARMII::BR_JTm: return ARM::BR_JTm; - case ARMII::BR_JTadd: return ARM::BR_JTadd; case ARMII::BX_RET: return ARM::BX_RET; case ARMII::LDRrr: return ARM::LDR; case ARMII::LDRri: return 0; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 611c42fd0e4b..b4fb8a77d864 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -33,6 +33,9 @@ def SDT_ARMBrcond : SDTypeProfile<0, 2, def SDT_ARMBrJT : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; +def SDT_ARMBr2JT : SDTypeProfile<0, 4, + [SDTCisPtrTy<0>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; @@ -72,6 +75,9 @@ def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT, [SDNPHasChain]>; +def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT, + [SDNPHasChain]>; + def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp, [SDNPOutFlag]>; @@ -205,6 +211,9 @@ def cpinst_operand : Operand { def jtblock_operand : Operand { let PrintMethod = "printJTBlockOperand"; } +def jt2block_operand : Operand { + let PrintMethod = "printJT2BlockOperand"; +} // Local PC labels. def pclabel : Operand { diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index bb36a33e1a10..c265d9fb8054 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -1080,24 +1080,12 @@ def t2B : T2XI<(outs), (ins brtarget:$target), "b $target", [(br bb:$target)]>; -let isNotDuplicable = 1, isIndirectBranch = 1 in { -def t2BR_JTr : T2JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id), - "mov pc, $target \n\t.align\t2\n$jt", - [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>; - -def t2BR_JTm : - T2JTI<(outs), - (ins t2addrmode_so_reg:$target, jtblock_operand:$jt, i32imm:$id), - "ldr pc, $target \n\t.align\t2\n$jt", - [(ARMbrjt (i32 (load t2addrmode_so_reg:$target)), tjumptable:$jt, - imm:$id)]>; - -def t2BR_JTadd : +let isNotDuplicable = 1, isIndirectBranch = 1 in +def t2BR_JT : T2JTI<(outs), - (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id), - "add pc, $target, $idx \n\t.align\t2\n$jt", - [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, imm:$id)]>; -} // isNotDuplicate, isIndirectBranch + (ins GPR:$base, GPR:$idx, jt2block_operand:$jt, i32imm:$id), + "add pc, $base, $idx, lsl #2\n$jt", + [(ARMbr2jt GPR:$base, GPR:$idx, tjumptable:$jt, imm:$id)]>; } // isBranch, isTerminator, isBarrier // FIXME: should be able to write a pattern for ARMBrcond, but can't use diff --git a/llvm/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 4d21d5cfd90f..cffbecd764bb 100644 --- a/llvm/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -160,6 +160,7 @@ namespace { void printCPInstOperand(const MachineInstr *MI, int OpNum, const char *Modifier); void printJTBlockOperand(const MachineInstr *MI, int OpNum); + void printJT2BlockOperand(const MachineInstr *MI, int OpNum); virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode); @@ -907,6 +908,8 @@ void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNum, } void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) { + assert(!Subtarget->isThumb2() && "Thumb2 should use double-jump jumptables!"); + const MachineOperand &MO1 = MI->getOperand(OpNum); const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id unsigned JTI = MO1.getIndex(); @@ -922,23 +925,13 @@ void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) { const std::vector &JT = MJTI->getJumpTables(); const std::vector &JTBBs = JT[JTI].MBBs; bool UseSet= TAI->getSetDirective() && TM.getRelocationModel() == Reloc::PIC_; - bool NeedBit0 = Subtarget->isTargetDarwin() && Subtarget->isThumb2(); SmallPtrSet JTSets; for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { MachineBasicBlock *MBB = JTBBs[i]; - if (UseSet && JTSets.insert(MBB)) { - // FIXME: Temporary workaround for an assembler bug. The assembler isn't - // setting the bit zero to 1 even though it is a thumb address. - if (NeedBit0) { - O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix() - << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm() - << "_set_" << MBB->getNumber() << ",("; - printBasicBlockLabel(MBB, false, false, false); - O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << JTI << '_' << MO2.getImm() << "+1)\n"; - } else - printPICJumpTableSetLabel(JTI, MO2.getImm(), MBB); - } + bool isNew = JTSets.insert(MBB); + + if (UseSet && isNew) + printPICJumpTableSetLabel(JTI, MO2.getImm(), MBB); O << JTEntryDirective << ' '; if (UseSet) @@ -952,19 +945,33 @@ void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) { O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm(); } else { - // FIXME: Temporary workaround for an assembler bug. The assembler isn't - // setting the bit zero to 1 even though it is a thumb address. - if (NeedBit0) - O << '('; printBasicBlockLabel(MBB, false, false, false); - if (NeedBit0) - O << "+1)"; } if (i != e-1) O << '\n'; } } +void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum) { + const MachineOperand &MO1 = MI->getOperand(OpNum); + const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id + unsigned JTI = MO1.getIndex(); + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << JTI << '_' << MO2.getImm() << ":\n"; + + const MachineFunction *MF = MI->getParent()->getParent(); + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + const std::vector &JT = MJTI->getJumpTables(); + const std::vector &JTBBs = JT[JTI].MBBs; + for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { + MachineBasicBlock *MBB = JTBBs[i]; + O << "\tb.w "; + printBasicBlockLabel(MBB, false, false, false); + if (i != e-1) + O << '\n'; + } +} + bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode){ diff --git a/llvm/lib/Target/ARM/README-Thumb2.txt b/llvm/lib/Target/ARM/README-Thumb2.txt index 810490b094b8..675b1d97250b 100644 --- a/llvm/lib/Target/ARM/README-Thumb2.txt +++ b/llvm/lib/Target/ARM/README-Thumb2.txt @@ -5,3 +5,7 @@ * We should model IT instructions explicitly. We should introduce them (even if if-converter is not run, the function could still contain movcc's) before PEI since passes starting from PEI may require exact code size. + +//===---------------------------------------------------------------------===// + +Make use of TBB and TBH for jumptables in small functions. diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index c38b20dafe71..bb4efa43563f 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -37,9 +37,6 @@ unsigned Thumb1InstrInfo::getOpcode(ARMII::Op Op) const { case ARMII::ADDrr: return ARM::tADDrr; case ARMII::B: return ARM::tB; case ARMII::Bcc: return ARM::tBcc; - case ARMII::BR_JTr: return ARM::tBR_JTr; - case ARMII::BR_JTm: return 0; - case ARMII::BR_JTadd: return 0; case ARMII::BX_RET: return ARM::tBX_RET; case ARMII::LDRrr: return ARM::tLDR; case ARMII::LDRri: return 0; diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index f1ac22197628..4d442c0d54d6 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -38,9 +38,6 @@ unsigned Thumb2InstrInfo::getOpcode(ARMII::Op Op) const { case ARMII::ADDrr: return ARM::t2ADDrr; case ARMII::B: return ARM::t2B; case ARMII::Bcc: return ARM::t2Bcc; - case ARMII::BR_JTr: return ARM::t2BR_JTr; - case ARMII::BR_JTm: return ARM::t2BR_JTm; - case ARMII::BR_JTadd: return ARM::t2BR_JTadd; case ARMII::BX_RET: return ARM::tBX_RET; case ARMII::LDRrr: return ARM::t2LDRs; case ARMII::LDRri: return ARM::t2LDRi12; @@ -64,9 +61,7 @@ Thumb2InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { switch (MBB.back().getOpcode()) { case ARM::t2LDM_RET: case ARM::t2B: // Uncond branch. - case ARM::t2BR_JTr: // Jumptable branch. - case ARM::t2BR_JTm: // Jumptable branch through mem. - case ARM::t2BR_JTadd: // Jumptable branch add to pc. + case ARM::t2BR_JT: // Jumptable branch. case ARM::tBR_JTr: // Jumptable branch (16-bit version). case ARM::tBX_RET: case ARM::tBX_RET_vararg: diff --git a/llvm/test/CodeGen/Thumb2/pic-jtbl.ll b/llvm/test/CodeGen/Thumb2/thumb2-jtbl.ll similarity index 93% rename from llvm/test/CodeGen/Thumb2/pic-jtbl.ll rename to llvm/test/CodeGen/Thumb2/thumb2-jtbl.ll index bb1facb4004d..4836e38a696e 100644 --- a/llvm/test/CodeGen/Thumb2/pic-jtbl.ll +++ b/llvm/test/CodeGen/Thumb2/thumb2-jtbl.ll @@ -1,9 +1,12 @@ +; RUN: llvm-as < %s | llc -mtriple=thumbv7-apple-darwin | FileCheck %s ; RUN: llvm-as < %s | llc -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s define void @bar(i32 %n.u) { entry: ; CHECK: bar: ; CHECK: add pc +; CHECK: b.w LBB1_2 + switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ] bb: tail call void(...)* @foo1() -- 2.34.1