From: Derek Schuff Date: Thu, 17 Mar 2016 17:00:29 +0000 (+0000) Subject: [WebAssembly] Stackify code emitted by eliminateFrameIndex and SP writeback X-Git-Tag: llvmorg-3.9.0-rc1~11526 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d4207ba0f6d5f110f5d0def0d1f675c6a3b05668;p=platform%2Fupstream%2Fllvm.git [WebAssembly] Stackify code emitted by eliminateFrameIndex and SP writeback Summary: MRI::eliminateFrameIndex can emit several instructions to do address calculations; these can usually be stackified. Because instructions with FI operands can have subsequent operands which may be expression trees, find the top of the leftmost tree and insert the code before it, to keep the LIFO property. Also use stackified registers when writing back the SP value to memory in the epilog; it's unnecessary because SP will not be used after the epilog, and it results in better code. Differential Revision: http://reviews.llvm.org/D18234 llvm-svn: 263725 --- diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp index ece8974..4e5583e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -78,19 +78,20 @@ bool WebAssemblyFrameLowering::needsSPWriteback( static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator &InsertPt, + MachineBasicBlock::iterator &InsertAddr, + MachineBasicBlock::iterator &InsertStore, DebugLoc DL) { auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer"); unsigned SPAddr = MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass); const auto *TII = MF.getSubtarget().getInstrInfo(); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPAddr) + BuildMI(MBB, InsertAddr, DL, TII->get(WebAssembly::CONST_I32), SPAddr) .addExternalSymbol(SPSymbol); auto *MMO = new MachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore, 4, 4); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), - WebAssembly::SP32) + BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32), + SrcReg) .addImm(0) .addReg(SPAddr) .addImm(2) // p2align @@ -108,7 +109,7 @@ void WebAssemblyFrameLowering::eliminateCallFramePseudoInstr( if (I->getOpcode() == TII->getCallFrameDestroyOpcode() && needsSPWriteback(MF, *MF.getFrameInfo())) { DebugLoc DL = I->getDebugLoc(); - writeSPToMemory(WebAssembly::SP32, MF, MBB, I, DL); + writeSPToMemory(WebAssembly::SP32, MF, MBB, I, I, DL); } MBB.erase(I); } @@ -171,7 +172,7 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, .addReg(WebAssembly::SP32); } if (StackSize && needsSPWriteback(MF, *MFI)) { - writeSPToMemory(WebAssembly::SP32, MF, MBB, InsertPt, DL); + writeSPToMemory(WebAssembly::SP32, MF, MBB, InsertPt, InsertPt, DL); } } @@ -192,18 +193,24 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF, // Restore the stack pointer. If we had fixed-size locals, add the offset // subtracted in the prolog. + unsigned SPReg = 0; + MachineBasicBlock::iterator InsertAddr = InsertPt; if (StackSize) { unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) - .addImm(StackSize); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), - WebAssembly::SP32) + InsertAddr = + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addImm(StackSize); + // In the epilog we don't need to write the result back to the SP32 physreg + // because it won't be used again. We can use a stackified register instead. + SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), SPReg) .addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32) .addReg(OffsetReg); WFI->stackifyVReg(OffsetReg); + WFI->stackifyVReg(SPReg); + } else { + SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32; } - writeSPToMemory( - (!StackSize && hasFP(MF)) ? WebAssembly::FP32 : WebAssembly::SP32, MF, - MBB, InsertPt, DL); + writeSPToMemory(SPReg, MF, MBB, InsertAddr, InsertPt, DL); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp index 607fd6e..683b52c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -51,6 +51,51 @@ WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction & /*MF*/) const { return Reserved; } +static bool isStackifiedVReg(const WebAssemblyFunctionInfo *WFI, + const MachineOperand& Op) { + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + return TargetRegisterInfo::isVirtualRegister(Reg) && + WFI->isVRegStackified(Reg); + } + return false; +} + +static bool canStackifyOperand(const MachineInstr& Inst) { + unsigned Op = Inst.getOpcode(); + return Op != TargetOpcode::PHI && + Op != TargetOpcode::INLINEASM && + Op != TargetOpcode::DBG_VALUE; +} + +// Determine if the FI sequence can be stackified, and if so, where the code can +// be inserted. If stackification is possible, returns true and ajusts II to +// point to the insertion point. +bool findInsertPt(const WebAssemblyFunctionInfo *WFI, MachineBasicBlock &MBB, + unsigned OperandNum, MachineBasicBlock::iterator &II) { + if (!canStackifyOperand(*II)) return false; + + MachineBasicBlock::iterator InsertPt(II); + int StackCount = 0; + // Operands are popped in reverse order, so any operands after FIOperand + // impose a constraint + for (unsigned i = OperandNum; i < II->getNumOperands(); i++) { + if (isStackifiedVReg(WFI, II->getOperand(i))) ++StackCount; + } + // Walk backwards, tracking stack depth. When it reaches 0 we have reached the + // top of the subtree. + while (StackCount) { + if (InsertPt == MBB.begin()) return false; + --InsertPt; + for (const auto &def : InsertPt->defs()) + if (isStackifiedVReg(WFI, def)) --StackCount; + for (const auto &use : InsertPt->explicit_uses()) + if (isStackifiedVReg(WFI, use)) ++StackCount; + } + II = InsertPt; + return true; +} + void WebAssemblyRegisterInfo::eliminateFrameIndex( MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger * /*RS*/) const { @@ -78,20 +123,34 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex( MI.getOperand(FIOperandNum) .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false); } else { - // Otherwise create an i32.add SP, offset and make it the operand. + // Otherwise calculate the address auto &MRI = MF.getRegInfo(); const auto *TII = MF.getSubtarget().getInstrInfo(); unsigned FIRegOperand = WebAssembly::SP32; if (FrameOffset) { - FIRegOperand = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::CONST_I32), - FIRegOperand) + // Create i32.add SP, offset and make it the operand. We want to stackify + // this sequence, but we need to preserve the LIFO expr stack ordering + // (i.e. we can't insert our code in between MI and any operands it + // pops before FIOperand). + auto *WFI = MF.getInfo(); + bool CanStackifyFI = findInsertPt(WFI, MBB, FIOperandNum, II); + + unsigned OffsetOp = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32), + OffsetOp) .addImm(FrameOffset); - BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::ADD_I32), + if (CanStackifyFI) { + WFI->stackifyVReg(OffsetOp); + FIRegOperand = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + WFI->stackifyVReg(FIRegOperand); + } else { + FIRegOperand = OffsetOp; + } + BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32), FIRegOperand) .addReg(WebAssembly::SP32) - .addReg(FIRegOperand); + .addReg(OffsetOp); } MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false); } diff --git a/llvm/test/CodeGen/WebAssembly/byval.ll b/llvm/test/CodeGen/WebAssembly/byval.ll index e3dd37e..35ec4e351 100644 --- a/llvm/test/CodeGen/WebAssembly/byval.ll +++ b/llvm/test/CodeGen/WebAssembly/byval.ll @@ -35,15 +35,15 @@ define void @byval_arg(%SmallStruct* %ptr) { ; CHECK-NEXT: i32.load $push[[L4:.+]]=, 0($0) ; CHECK-NEXT: i32.store {{.*}}=, 12([[SP]]), $pop[[L4]] ; Pass a pointer to the stack slot to the function - ; CHECK-NEXT: i32.const [[L5:.+]]=, 12 - ; CHECK-NEXT: i32.add [[ARG:.+]]=, [[SP]], [[L5]] - ; CHECK-NEXT: call ext_byval_func@FUNCTION, [[L5]] + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12 + ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, [[SP]], $pop[[L5]] + ; CHECK-NEXT: call ext_byval_func@FUNCTION, $pop[[ARG]] call void @ext_byval_func(%SmallStruct* byval %ptr) ; Restore the stack - ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 16 - ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L6]] ; CHECK-NEXT: i32.const $push[[L7:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L7]]), [[SP]] + ; CHECK-NEXT: i32.const $push[[L6:.+]]=, 16 + ; CHECK-NEXT: i32.add $push[[L8:.+]]=, [[SP]], $pop[[L6]] + ; CHECK-NEXT: i32.store {{.*}}=, 0($pop[[L7]]), $pop[[L8]] ; CHECK-NEXT: return ret void } @@ -58,9 +58,9 @@ define void @byval_arg_align8(%SmallStruct* %ptr) { ; CHECK: i32.load $push[[L4:.+]]=, 0($0):p2align=3 ; CHECK-NEXT: i32.store {{.*}}=, 8([[SP]]):p2align=3, $pop[[L4]] ; Pass a pointer to the stack slot to the function - ; CHECK-NEXT: i32.const [[L5:.+]]=, 8 - ; CHECK-NEXT: i32.add [[ARG:.+]]=, [[SP]], [[L5]] - ; CHECK-NEXT: call ext_byval_func_align8@FUNCTION, [[L5]] + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 8 + ; CHECK-NEXT: i32.add $push[[ARG:.+]]=, [[SP]], $pop[[L5]] + ; CHECK-NEXT: call ext_byval_func_align8@FUNCTION, $pop[[ARG]] call void @ext_byval_func_align8(%SmallStruct* byval align 8 %ptr) ret void } diff --git a/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll index 2c09784..f71903c 100644 --- a/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll @@ -61,8 +61,8 @@ define void @set_no(i8* %dst, i8 %src, i32 %len) { ; CHECK-LABEL: frame_index: -; CHECK: i32.call $discard=, memset@FUNCTION, $0, $pop1, $pop0{{$}} -; CHECK: i32.call $discard=, memset@FUNCTION, $1, $pop3, $pop2{{$}} +; CHECK: i32.call $discard=, memset@FUNCTION, $pop12, $pop1, $pop0{{$}} +; CHECK: i32.call $discard=, memset@FUNCTION, $0, $pop3, $pop2{{$}} ; CHECK: return{{$}} define void @frame_index() { entry: diff --git a/llvm/test/CodeGen/WebAssembly/userstack.ll b/llvm/test/CodeGen/WebAssembly/userstack.ll index 15581f9..09748e6 100644 --- a/llvm/test/CodeGen/WebAssembly/userstack.ll +++ b/llvm/test/CodeGen/WebAssembly/userstack.ll @@ -22,10 +22,10 @@ define void @alloca32() noredzone { ; CHECK: i32.const $push[[L0:.+]]=, 0 ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L0]] store i32 0, i32* %retval - ; CHECK: i32.const $push[[L5:.+]]=, 16 - ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L5]] - ; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), [[SP]] + ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16 + ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]] + ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]] ret void } @@ -49,7 +49,7 @@ define void @alloca3264() { } ; CHECK-LABEL: allocarray: -; CHECK: .local i32, i32{{$}} +; CHECK: .local i32{{$}} define void @allocarray() { ; CHECK: i32.const $push[[L1:.+]]=, __stack_pointer ; CHECK-NEXT: i32.load $push[[L2:.+]]=, 0($pop[[L1]]) @@ -59,10 +59,10 @@ define void @allocarray() { ; CHECK-NEXT: i32.store $discard=, 0($pop[[L4]]), [[SP]] %r = alloca [33 x i32] + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 12 + ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]] ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 12 - ; CHECK-NEXT: i32.const [[L5:.+]]=, 12 - ; CHECK-NEXT: i32.add [[L5]]=, [[SP]], [[L5]] - ; CHECK-NEXT: i32.add $push[[L6:.+]]=, [[L5]], $pop[[L4]] + ; CHECK-NEXT: i32.add $push[[L6:.+]]=, $pop[[L7]], $pop[[L4]] ; CHECK-NEXT: i32.const $push[[L9:.+]]=, 1{{$}} ; CHECK-NEXT: i32.store $push[[L10:.+]]=, 12([[SP]]), $pop[[L9]]{{$}} ; CHECK-NEXT: i32.store $discard=, 0($pop3), $pop[[L10]]{{$}} @@ -71,10 +71,10 @@ define void @allocarray() { %p2 = getelementptr [33 x i32], [33 x i32]* %r, i32 0, i32 3 store i32 1, i32* %p2 - ; CHECK: i32.const $push[[L11:.+]]=, 144 - ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L11]] - ; CHECK-NEXT: i32.const $push[[L12:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L12]]), [[SP]] + ; CHECK: i32.const $push[[L12:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.const $push[[L11:.+]]=, 144 + ; CHECK-NEXT: i32.add $push[[L13:.+]]=, [[SP]], $pop[[L11]] + ; CHECK-NEXT: i32.store $discard=, 0($pop[[L12]]), $pop[[L13]] ret void } @@ -86,18 +86,18 @@ define void @non_mem_use(i8** %addr) { %r = alloca i64 %r2 = alloca i64 ; %r is at SP+8 - ; CHECK: i32.const [[OFF:.+]]=, 8 - ; CHECK-NEXT: i32.add [[ARG1:.+]]=, [[SP]], [[OFF]] - ; CHECK-NEXT: call ext_func@FUNCTION, [[ARG1]] + ; CHECK: i32.const $push[[OFF:.+]]=, 8 + ; CHECK-NEXT: i32.add $push[[ARG1:.+]]=, [[SP]], $pop[[OFF]] + ; CHECK-NEXT: call ext_func@FUNCTION, $pop[[ARG1]] call void @ext_func(i64* %r) ; %r2 is at SP+0, no add needed ; CHECK-NEXT: call ext_func@FUNCTION, [[SP]] call void @ext_func(i64* %r2) ; Use as a value, but in a store ; %buf is at SP+16 - ; CHECK: i32.const [[OFF:.+]]=, 16 - ; CHECK-NEXT: i32.add [[VAL:.+]]=, [[SP]], [[OFF]] - ; CHECK-NEXT: i32.store {{.*}}=, 0($0), [[VAL]] + ; CHECK: i32.const $push[[OFF:.+]]=, 16 + ; CHECK-NEXT: i32.add $push[[VAL:.+]]=, [[SP]], $pop[[OFF]] + ; CHECK-NEXT: i32.store {{.*}}=, 0($0), $pop[[VAL]] %gep = getelementptr inbounds [27 x i8], [27 x i8]* %buf, i32 0, i32 0 store i8* %gep, i8** %addr ret void @@ -120,10 +120,10 @@ define void @allocarray_inbounds() { %p2 = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 3 store i32 1, i32* %p2 call void @ext_func(i64* null); - ; CHECK: i32.const $push[[L5:.+]]=, 32 - ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], $pop[[L5]] - ; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), [[SP]] + ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 32 + ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[SP]], $pop[[L5]] + ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]] ret void } @@ -143,7 +143,7 @@ define void @dynamic_alloca(i32 %alloc) { ; CHECK: call ext_func_i32@FUNCTION call void @ext_func_i32(i32* %r) ; CHECK: i32.const $push[[L3:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store [[SP]]=, 0($pop[[L3]]), [[FP]] + ; CHECK-NEXT: i32.store $discard=, 0($pop[[L3]]), [[FP]] ret void } @@ -183,10 +183,10 @@ define void @dynamic_static_alloca(i32 %alloc) noredzone { %r1 = alloca i32 %r = alloca i32, i32 %alloc store i32 0, i32* %r - ; CHECK: i32.const $push[[L5:.+]]=, 16 - ; CHECK-NEXT: i32.add [[SP]]=, [[FP]], $pop[[L5]] - ; CHECK-NEXT: i32.const $push[[L6:.+]]=, __stack_pointer - ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), [[SP]] + ; CHECK: i32.const $push[[L6:.+]]=, __stack_pointer + ; CHECK-NEXT: i32.const $push[[L5:.+]]=, 16 + ; CHECK-NEXT: i32.add $push[[L7:.+]]=, [[FP]], $pop[[L5]] + ; CHECK-NEXT: i32.store $discard=, 0($pop[[L6]]), $pop[[L7]] ret void } @@ -198,9 +198,9 @@ entry: ; CHECK: i32.const $push[[L1:.+]]=, 16 ; CHECK-NEXT: i32.sub [[SP:.+]]=, {{.+}}, $pop[[L1]] %addr = alloca i32 - ; CHECK: i32.const [[OFF:.+]]=, 12 - ; CHECK-NEXT: i32.add [[ADDR:.+]]=, [[SP]], [[OFF]] - ; CHECK-NEXT: copy_local [[COPY:.+]]=, [[ADDR]] + ; CHECK: i32.const $push[[OFF:.+]]=, 12 + ; CHECK-NEXT: i32.add $push[[ADDR:.+]]=, [[SP]], $pop[[OFF]] + ; CHECK-NEXT: copy_local [[COPY:.+]]=, $pop[[ADDR]] br label %body body: %a = phi i32* [%addr, %entry], [%b, %body]