From ffd57408efd4c8d455616a5ff4f623250e8580c9 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 19 Dec 2019 15:21:53 -0800 Subject: [PATCH] [BPF] Enable relocation location for load/store/shifts Previous btf field relocation is always at assignment like r1 = 4 which is converted from an ld_imm64 instruction. This patch did an optimization such that relocation instruction might be load/store/shift. Specically, the following insns may also have relocation, except BPF_MOV: LDB, LDH, LDW, LDD, STB, STH, STW, STD, LDB32, LDH32, LDW32, STB32, STH32, STW32, SLL, SRL, SRA To accomplish this, a few BPF target specific codegen only instructions are invented. They are generated at backend BPF SimplifyPatchable phase, which is at early llc phase when SSA form is available. The new codegen only instructions will be converted to real proper instructions at the codegen and BTF emission stage. Note that, as revealed by a few tests, this optimization might be actual generating more relocations: Scenario 1: if (...) { ... __builtin_preserve_field_info(arg->b2, 0) ... } else { ... __builtin_preserve_field_info(arg->b2, 0) ... } Compiler could do CSE to only have one relocation. But if both of the above is translated into codegen internal instructions, the compiler will not be able to do that. Scenario 2: offset = ... __builtin_preserve_field_info(arg->b2, 0) ... ... ... offset ... ... offset ... ... offset ... For whatever reason, the compiler might be temporarily do copy propagation of the righthand of "offset" assignment like ... __builtin_preserve_field_info(arg->b2, 0) ... ... __builtin_preserve_field_info(arg->b2, 0) ... and CSE will be able to deduplicate later. But if these intrinsics are converted to BPF pseudo instructions, they will not be able to get deduplicated. I do not expect we have big instruction count difference. It may actually reduce instruction count since now relocation is in deeper insn dependency chain. For example, for test offset-reloc-fieldinfo-2.ll, this patch generates 7 instead of 6 relocations for non-alu32 mode, but it actually reduced instruction count from 29 to 26. Differential Revision: https://reviews.llvm.org/D71790 --- llvm/lib/Target/BPF/BPFInstrInfo.td | 19 +++ llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp | 168 +++++++++++++++++++-- llvm/lib/Target/BPF/BTFDebug.cpp | 74 +++++---- llvm/lib/Target/BPF/BTFDebug.h | 12 +- .../test/CodeGen/BPF/CORE/offset-reloc-end-load.ll | 6 +- .../CodeGen/BPF/CORE/offset-reloc-fieldinfo-1.ll | 11 +- .../CodeGen/BPF/CORE/offset-reloc-fieldinfo-2.ll | 26 +++- 7 files changed, 251 insertions(+), 65 deletions(-) diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index ae5a82a..0f39294 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -437,6 +437,25 @@ class LOAD Pattern> class LOADi64 : LOAD; +let isCodeGenOnly = 1 in { + def CORE_MEM : TYPE_LD_ST; + def CORE_ALU32_MEM : TYPE_LD_ST; + let Constraints = "$dst = $src" in { + def CORE_SHIFT : ALU_RR; + } +} let Predicates = [BPFNoALU32] in { def LDW : LOADi64; diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp index d21b977..5310f0f 100644 --- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -53,6 +53,19 @@ private: void initialize(MachineFunction &MFParm); bool removeLD(void); + void processCandidate(MachineRegisterInfo *MRI, MachineBasicBlock &MBB, + MachineInstr &MI, Register &SrcReg, Register &DstReg, + const GlobalValue *GVal); + void processDstReg(MachineRegisterInfo *MRI, Register &DstReg, + Register &SrcReg, const GlobalValue *GVal, + bool doSrcRegProp); + void processInst(MachineRegisterInfo *MRI, MachineInstr *Inst, + MachineOperand *RelocOp, const GlobalValue *GVal); + void checkADDrr(MachineRegisterInfo *MRI, MachineOperand *RelocOp, + const GlobalValue *GVal); + void checkShift(MachineRegisterInfo *MRI, MachineBasicBlock &MBB, + MachineOperand *RelocOp, const GlobalValue *GVal, + unsigned Opcode); public: // Main entry point for this pass. @@ -71,6 +84,146 @@ void BPFMISimplifyPatchable::initialize(MachineFunction &MFParm) { LLVM_DEBUG(dbgs() << "*** BPF simplify patchable insts pass ***\n\n"); } +void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI, + MachineOperand *RelocOp, const GlobalValue *GVal) { + const MachineInstr *Inst = RelocOp->getParent(); + const MachineOperand *Op1 = &Inst->getOperand(1); + const MachineOperand *Op2 = &Inst->getOperand(2); + const MachineOperand *BaseOp = (RelocOp == Op1) ? Op2 : Op1; + + // Go through all uses of %1 as in %1 = ADD_rr %2, %3 + const MachineOperand Op0 = Inst->getOperand(0); + auto Begin = MRI->use_begin(Op0.getReg()), End = MRI->use_end(); + decltype(End) NextI; + for (auto I = Begin; I != End; I = NextI) { + NextI = std::next(I); + // The candidate needs to have a unique definition. + if (!MRI->getUniqueVRegDef(I->getReg())) + continue; + + MachineInstr *DefInst = I->getParent(); + unsigned Opcode = DefInst->getOpcode(); + unsigned COREOp; + if (Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW || + Opcode == BPF::LDD || Opcode == BPF::STB || Opcode == BPF::STH || + Opcode == BPF::STW || Opcode == BPF::STD) + COREOp = BPF::CORE_MEM; + else if (Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || + Opcode == BPF::LDW32 || Opcode == BPF::STB32 || + Opcode == BPF::STH32 || Opcode == BPF::STW32) + COREOp = BPF::CORE_ALU32_MEM; + else + continue; + + // It must be a form of %1 = *(type *)(%2 + 0) or *(type *)(%2 + 0) = %1. + const MachineOperand &ImmOp = DefInst->getOperand(2); + if (!ImmOp.isImm() || ImmOp.getImm() != 0) + continue; + + BuildMI(*DefInst->getParent(), *DefInst, DefInst->getDebugLoc(), TII->get(COREOp)) + .add(DefInst->getOperand(0)).addImm(Opcode).add(*BaseOp) + .addGlobalAddress(GVal); + DefInst->eraseFromParent(); + } +} + +void BPFMISimplifyPatchable::checkShift(MachineRegisterInfo *MRI, + MachineBasicBlock &MBB, MachineOperand *RelocOp, const GlobalValue *GVal, + unsigned Opcode) { + // Relocation operand should be the operand #2. + MachineInstr *Inst = RelocOp->getParent(); + if (RelocOp != &Inst->getOperand(2)) + return; + + BuildMI(MBB, *Inst, Inst->getDebugLoc(), TII->get(BPF::CORE_SHIFT)) + .add(Inst->getOperand(0)).addImm(Opcode) + .add(Inst->getOperand(1)).addGlobalAddress(GVal); + Inst->eraseFromParent(); +} + +void BPFMISimplifyPatchable::processCandidate(MachineRegisterInfo *MRI, + MachineBasicBlock &MBB, MachineInstr &MI, Register &SrcReg, + Register &DstReg, const GlobalValue *GVal) { + if (MRI->getRegClass(DstReg) == &BPF::GPR32RegClass) { + // We can optimize such a pattern: + // %1:gpr = LD_imm64 @"llvm.s:0:4$0:2" + // %2:gpr32 = LDW32 %1:gpr, 0 + // %3:gpr = SUBREG_TO_REG 0, %2:gpr32, %subreg.sub_32 + // %4:gpr = ADD_rr %0:gpr, %3:gpr + // or similar patterns below for non-alu32 case. + auto Begin = MRI->use_begin(DstReg), End = MRI->use_end(); + decltype(End) NextI; + for (auto I = Begin; I != End; I = NextI) { + NextI = std::next(I); + if (!MRI->getUniqueVRegDef(I->getReg())) + continue; + + unsigned Opcode = I->getParent()->getOpcode(); + if (Opcode == BPF::SUBREG_TO_REG) { + Register TmpReg = I->getParent()->getOperand(0).getReg(); + processDstReg(MRI, TmpReg, DstReg, GVal, false); + } + } + + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::COPY), DstReg) + .addReg(SrcReg, 0, BPF::sub_32); + return; + } + + // All uses of DstReg replaced by SrcReg + processDstReg(MRI, DstReg, SrcReg, GVal, true); +} + +void BPFMISimplifyPatchable::processDstReg(MachineRegisterInfo *MRI, + Register &DstReg, Register &SrcReg, const GlobalValue *GVal, + bool doSrcRegProp) { + auto Begin = MRI->use_begin(DstReg), End = MRI->use_end(); + decltype(End) NextI; + for (auto I = Begin; I != End; I = NextI) { + NextI = std::next(I); + if (doSrcRegProp) + I->setReg(SrcReg); + + // The candidate needs to have a unique definition. + if (MRI->getUniqueVRegDef(I->getReg())) + processInst(MRI, I->getParent(), &*I, GVal); + } +} + +// Check to see whether we could do some optimization +// to attach relocation to downstream dependent instructions. +// Two kinds of patterns are recognized below: +// Pattern 1: +// %1 = LD_imm64 @"llvm.b:0:4$0:1" <== patch_imm = 4 +// %2 = LDD %1, 0 <== this insn will be removed +// %3 = ADD_rr %0, %2 +// %4 = LDW[32] %3, 0 OR STW[32] %4, %3, 0 +// The `%4 = ...` will be transformed to +// CORE_[ALU32_]MEM(%4, mem_opcode, %0, @"llvm.b:0:4$0:1") +// and later on, BTF emit phase will translate to +// %4 = LDW[32] %0, 4 STW[32] %4, %0, 4 +// and attach a relocation to it. +// Pattern 2: +// %15 = LD_imm64 @"llvm.t:5:63$0:2" <== relocation type 5 +// %16 = LDD %15, 0 <== this insn will be removed +// %17 = SRA_rr %14, %16 +// The `%17 = ...` will be transformed to +// %17 = CORE_SHIFT(SRA_ri, %14, @"llvm.t:5:63$0:2") +// and later on, BTF emit phase will translate to +// %r4 = SRA_ri %r4, 63 +void BPFMISimplifyPatchable::processInst(MachineRegisterInfo *MRI, + MachineInstr *Inst, MachineOperand *RelocOp, const GlobalValue *GVal) { + unsigned Opcode = Inst->getOpcode(); + if (Opcode == BPF::ADD_rr) + checkADDrr(MRI, RelocOp, GVal); + else if (Opcode == BPF::SLL_rr) + checkShift(MRI, *Inst->getParent(), RelocOp, GVal, BPF::SLL_ri); + else if (Opcode == BPF::SRA_rr) + checkShift(MRI, *Inst->getParent(), RelocOp, GVal, BPF::SRA_ri); + else if (Opcode == BPF::SRL_rr) + checkShift(MRI, *Inst->getParent(), RelocOp, GVal, BPF::SRL_ri); +} + /// Remove unneeded Load instructions. bool BPFMISimplifyPatchable::removeLD() { MachineRegisterInfo *MRI = &MF->getRegInfo(); @@ -105,10 +258,11 @@ bool BPFMISimplifyPatchable::removeLD() { continue; bool IsCandidate = false; + const GlobalValue *GVal = nullptr; if (DefInst->getOpcode() == BPF::LD_imm64) { const MachineOperand &MO = DefInst->getOperand(1); if (MO.isGlobal()) { - const GlobalValue *GVal = MO.getGlobal(); + GVal = MO.getGlobal(); auto *GVar = dyn_cast(GVal); if (GVar) { // Global variables representing structure offset or @@ -124,17 +278,7 @@ bool BPFMISimplifyPatchable::removeLD() { if (!IsCandidate) continue; - if (MRI->getRegClass(DstReg) == &BPF::GPR32RegClass) { - BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::COPY), DstReg) - .addReg(SrcReg, 0, BPF::sub_32); - } else { - auto Begin = MRI->use_begin(DstReg), End = MRI->use_end(); - decltype(End) NextI; - for (auto I = Begin; I != End; I = NextI) { - NextI = std::next(I); - I->setReg(SrcReg); - } - } + processCandidate(MRI, MBB, MI, SrcReg, DstReg, GVal); ToErase = &MI; Changed = true; diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index bdc7ce7..86e625b 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -937,9 +937,8 @@ unsigned BTFDebug::populateStructType(const DIType *Ty) { } /// Generate a struct member field relocation. -void BTFDebug::generateFieldReloc(const MachineInstr *MI, - const MCSymbol *ORSym, DIType *RootTy, - StringRef AccessPattern) { +void BTFDebug::generateFieldReloc(const MCSymbol *ORSym, DIType *RootTy, + StringRef AccessPattern) { unsigned RootId = populateStructType(RootTy); size_t FirstDollar = AccessPattern.find_first_of('$'); size_t FirstColon = AccessPattern.find_first_of(':'); @@ -959,33 +958,8 @@ void BTFDebug::generateFieldReloc(const MachineInstr *MI, FieldRelocTable[SecNameOff].push_back(FieldReloc); } -void BTFDebug::processLDimm64(const MachineInstr *MI) { - // If the insn is an LD_imm64, the following two cases - // will generate an .BTF.ext record. - // - // If the insn is "r2 = LD_imm64 @__BTF_...", - // add this insn into the .BTF.ext FieldReloc subsection. - // Relocation looks like: - // . SecName: - // . InstOffset - // . TypeID - // . OffSetNameOff - // Later, the insn is replaced with "r2 = " - // where "" equals to the offset based on current - // type definitions. - // - // If the insn is "r2 = LD_imm64 @VAR" and VAR is - // a patchable external global, add this insn into the .BTF.ext - // ExternReloc subsection. - // Relocation looks like: - // . SecName: - // . InstOffset - // . ExternNameOff - // Later, the insn is replaced with "r2 = " or - // "LD_imm64 r2, " where "" = 0. - +void BTFDebug::processReloc(const MachineOperand &MO) { // check whether this is a candidate or not - const MachineOperand &MO = MI->getOperand(1); if (MO.isGlobal()) { const GlobalValue *GVal = MO.getGlobal(); auto *GVar = dyn_cast(GVal); @@ -995,7 +969,7 @@ void BTFDebug::processLDimm64(const MachineInstr *MI) { MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index); DIType *Ty = dyn_cast(MDN); - generateFieldReloc(MI, ORSym, Ty, GVar->getName()); + generateFieldReloc(ORSym, Ty, GVar->getName()); } } } @@ -1020,8 +994,25 @@ void BTFDebug::beginInstruction(const MachineInstr *MI) { return; } - if (MI->getOpcode() == BPF::LD_imm64) - processLDimm64(MI); + if (MI->getOpcode() == BPF::LD_imm64) { + // If the insn is "r2 = LD_imm64 @", + // add this insn into the .BTF.ext FieldReloc subsection. + // Relocation looks like: + // . SecName: + // . InstOffset + // . TypeID + // . OffSetNameOff + // . RelocType + // Later, the insn is replaced with "r2 = " + // where "" equals to the offset based on current + // type definitions. + processReloc(MI->getOperand(1)); + } else if (MI->getOpcode() == BPF::CORE_MEM || + MI->getOpcode() == BPF::CORE_ALU32_MEM || + MI->getOpcode() == BPF::CORE_SHIFT) { + // relocation insn is a load, store or shift insn. + processReloc(MI->getOperand(3)); + } // Skip this instruction if no DebugLoc or the DebugLoc // is the same as the previous instruction. @@ -1148,6 +1139,25 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) { return true; } } + } else if (MI->getOpcode() == BPF::CORE_MEM || + MI->getOpcode() == BPF::CORE_ALU32_MEM || + MI->getOpcode() == BPF::CORE_SHIFT) { + const MachineOperand &MO = MI->getOperand(3); + if (MO.isGlobal()) { + const GlobalValue *GVal = MO.getGlobal(); + auto *GVar = dyn_cast(GVal); + if (GVar && GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) { + uint32_t Imm = PatchImms[GVar->getName().str()]; + OutMI.setOpcode(MI->getOperand(1).getImm()); + if (MI->getOperand(0).isImm()) + OutMI.addOperand(MCOperand::createImm(MI->getOperand(0).getImm())); + else + OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); + OutMI.addOperand(MCOperand::createReg(MI->getOperand(2).getReg())); + OutMI.addOperand(MCOperand::createImm(Imm)); + return true; + } + } } return false; } diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h index c0d3f36..da23ef0 100644 --- a/llvm/lib/Target/BPF/BTFDebug.h +++ b/llvm/lib/Target/BPF/BTFDebug.h @@ -223,7 +223,7 @@ struct BTFLineInfo { uint32_t ColumnNum; ///< the column number }; -/// Represent one offset relocation. +/// Represent one field relocation. struct BTFFieldReloc { const MCSymbol *Label; ///< MCSymbol identifying insn for the reloc uint32_t TypeID; ///< Type ID @@ -296,15 +296,15 @@ class BTFDebug : public DebugHandlerBase { /// Generate types for function prototypes. void processFuncPrototypes(); - /// Generate one offset relocation record. - void generateFieldReloc(const MachineInstr *MI, const MCSymbol *ORSym, - DIType *RootTy, StringRef AccessPattern); + /// Generate one field relocation record. + void generateFieldReloc(const MCSymbol *ORSym, DIType *RootTy, + StringRef AccessPattern); /// Populating unprocessed struct type. unsigned populateStructType(const DIType *Ty); - /// Process LD_imm64 instructions. - void processLDimm64(const MachineInstr *MI); + /// Process relocation instructions. + void processReloc(const MachineOperand &MO); /// Emit common header of .BTF and .BTF.ext sections. void emitCommonHeader(); diff --git a/llvm/test/CodeGen/BPF/CORE/offset-reloc-end-load.ll b/llvm/test/CodeGen/BPF/CORE/offset-reloc-end-load.ll index 872f69e..a0dd6c8 100644 --- a/llvm/test/CodeGen/BPF/CORE/offset-reloc-end-load.ll +++ b/llvm/test/CodeGen/BPF/CORE/offset-reloc-end-load.ll @@ -20,10 +20,8 @@ entry: } ; CHECK-LABEL: test -; CHECK: r2 = 4 -; CHECK: r1 += r2 -; CHECK-ALU64: r0 = *(u32 *)(r1 + 0) -; CHECK-ALU32: w0 = *(u32 *)(r1 + 0) +; CHECK-ALU64: r0 = *(u32 *)(r1 + 4) +; CHECK-ALU32: w0 = *(u32 *)(r1 + 4) ; CHECK: exit ; ; CHECK: .long 1 # BTF_KIND_STRUCT(id = 2) diff --git a/llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-1.ll b/llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-1.ll index aa60d0c..d7e48d3 100644 --- a/llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-1.ll +++ b/llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-1.ll @@ -73,8 +73,9 @@ entry: ; CHECK: r{{[0-9]+}} = 4 ; CHECK: r{{[0-9]+}} = 4 -; CHECK: r{{[0-9]+}} = 51 -; CHECK: r{{[0-9]+}} = 60 +; CHECK: r{{[0-9]+}} <<= 51 +; CHECK: r{{[0-9]+}} s>>= 60 +; CHECK: r{{[0-9]+}} >>= 60 ; CHECK: r{{[0-9]+}} = 1 ; CHECK: .byte 115 # string offset=1 @@ -83,7 +84,7 @@ entry: ; CHECK: .long 16 # FieldReloc ; CHECK-NEXT: .long 30 # Field reloc section string offset=30 -; CHECK-NEXT: .long 5 +; CHECK-NEXT: .long 6 ; CHECK-NEXT: .long .Ltmp{{[0-9]+}} ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 73 @@ -103,6 +104,10 @@ entry: ; CHECK-NEXT: .long .Ltmp{{[0-9]+}} ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 73 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 73 ; CHECK-NEXT: .long 3 ; Function Attrs: argmemonly nounwind willreturn diff --git a/llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-2.ll b/llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-2.ll index 929afd7..01af9d8 100644 --- a/llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-2.ll +++ b/llvm/test/CodeGen/BPF/CORE/offset-reloc-fieldinfo-2.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK,CHECK-EL %s -; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK,CHECK-EB %s -; RUN: llc -march=bpfel -mattr=+alu32 -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK,CHECK-EL %s -; RUN: llc -march=bpfeb -mattr=+alu32 -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK,CHECK-EB %s +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK,CHECK-EL,CHECK64 %s +; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK,CHECK-EB,CHECK64 %s +; RUN: llc -march=bpfel -mattr=+alu32 -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK,CHECK-EL,CHECK32 %s +; RUN: llc -march=bpfeb -mattr=+alu32 -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK,CHECK-EB,CHECK32 %s ; Source code: ; struct s { ; int a; @@ -114,9 +114,10 @@ sw.epilog: ; preds = %entry, %sw.bb9, %sw ; CHECK: r{{[0-9]+}} = 4 ; CHECK: r{{[0-9]+}} = 4 -; CHECK-EL: r{{[0-9]+}} = 51 -; CHECK-EB: r{{[0-9]+}} = 41 -; CHECK: r{{[0-9]+}} = 60 +; CHECK-EL: r{{[0-9]+}} <<= 51 +; CHECK-EB: r{{[0-9]+}} <<= 41 +; CHECK: r{{[0-9]+}} s>>= 60 +; CHECK: r{{[0-9]+}} >>= 60 ; CHECK: r{{[0-9]+}} = 1 ; CHECK: .long 1 # BTF_KIND_STRUCT(id = 2) @@ -126,7 +127,8 @@ sw.epilog: ; preds = %entry, %sw.bb9, %sw ; CHECK: .long 16 # FieldReloc ; CHECK-NEXT: .long 30 # Field reloc section string offset=30 -; CHECK-NEXT: .long 5 +; CHECK32: .long 6 +; CHECK64: .long 7 ; CHECK-NEXT: .long .Ltmp{{[0-9]+}} ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 36 @@ -135,6 +137,10 @@ sw.epilog: ; preds = %entry, %sw.bb9, %sw ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 36 ; CHECK-NEXT: .long 1 +; CHECK64: .long .Ltmp{{[0-9]+}} +; CHECK64: .long 2 +; CHECK64: .long 36 +; CHECK64: .long 0 ; CHECK-NEXT: .long .Ltmp{{[0-9]+}} ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 36 @@ -146,6 +152,10 @@ sw.epilog: ; preds = %entry, %sw.bb9, %sw ; CHECK-NEXT: .long .Ltmp{{[0-9]+}} ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 36 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 36 ; CHECK-NEXT: .long 3 ; Function Attrs: nounwind readnone -- 2.7.4