From: Alex Richardson Date: Fri, 29 Mar 2019 12:13:56 +0000 (+0000) Subject: [SelectionDAGBuilder] Stop setting alignment to one for hidden sret values X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d1ff003fbbb36891ca7752785dec86cfd1a76139;p=platform%2Fupstream%2Fllvm.git [SelectionDAGBuilder] Stop setting alignment to one for hidden sret values We allocated a suitably aligned frame index so we know that all the values have ABI alignment. For MIPS this avoids using pair of lwl + lwr instructions instead of a single lw. I found this when compiling CHERI pure capability code where we can't use the lwl/lwr unaligned loads/stores and and were to falling back to a byte load + shift + or sequence. This should save a few instructions for MIPS and possibly other backends that don't have fast unaligned loads/stores. It also improves code generation for CodeGen/X86/pr34653.ll and CodeGen/WebAssembly/offset.ll since they can now use aligned loads. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D78999 --- diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 680b33e..944aeab 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1833,6 +1833,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { unsigned NumValues = ValueVTs.size(); SmallVector Chains(NumValues); + Align BaseAlign = DL.getPrefTypeAlign(I.getOperand(0)->getType()); for (unsigned i = 0; i != NumValues; ++i) { // An aggregate return value cannot wrap around the address space, so // offsets to its parts don't wrap either. @@ -1841,9 +1842,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SDValue Val = RetOp.getValue(RetOp.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]); - Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val, + Chains[i] = DAG.getStore( + Chain, getCurSDLoc(), Val, // FIXME: better loc info would be nice. - Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), + commonAlignment(BaseAlign, Offsets[i])); } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), @@ -9271,6 +9274,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); + MachineFunction &MF = CLI.DAG.getMachineFunction(); + Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(DemoteStackIdx); for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, CLI.DAG.getConstant(Offsets[i], CLI.DL, @@ -9279,7 +9284,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), DemoteStackIdx, Offsets[i]), - /* Alignment = */ 1); + HiddenSRetAlign); ReturnValues[i] = L; Chains[i] = L.getValue(1); } diff --git a/llvm/test/CodeGen/Mips/implicit-sret.ll b/llvm/test/CodeGen/Mips/implicit-sret.ll index 3cc0892..e86cec3 100644 --- a/llvm/test/CodeGen/Mips/implicit-sret.ll +++ b/llvm/test/CodeGen/Mips/implicit-sret.ll @@ -16,23 +16,13 @@ define internal void @test() unnamed_addr nounwind { ; CHECK-NEXT: daddiu $4, $sp, 8 ; CHECK-NEXT: jal implicit_sret_decl ; CHECK-NEXT: nop -; CHECK-NEXT: # implicit-def: $at_64 -; CHECK-NEXT: ldl $1, 24($sp) -; CHECK-NEXT: ldr $1, 31($sp) +; CHECK-NEXT: ld $6, 24($sp) +; CHECK-NEXT: ld $5, 16($sp) +; CHECK-NEXT: ld $7, 32($sp) +; CHECK-NEXT: lw $1, 8($sp) ; CHECK-NEXT: # implicit-def: $v0_64 -; CHECK-NEXT: ldl $2, 16($sp) -; CHECK-NEXT: ldr $2, 23($sp) -; CHECK-NEXT: # implicit-def: $v1_64 -; CHECK-NEXT: ldl $3, 32($sp) -; CHECK-NEXT: ldr $3, 39($sp) -; CHECK-NEXT: # implicit-def: $a1 -; CHECK-NEXT: lwl $5, 8($sp) -; CHECK-NEXT: lwr $5, 11($sp) -; CHECK-NEXT: # implicit-def: $a0_64 -; CHECK-NEXT: move $4, $5 -; CHECK-NEXT: move $5, $2 -; CHECK-NEXT: move $6, $1 -; CHECK-NEXT: move $7, $3 +; CHECK-NEXT: move $2, $1 +; CHECK-NEXT: move $4, $2 ; CHECK-NEXT: jal use_sret ; CHECK-NEXT: nop ; CHECK-NEXT: ld $ra, 40($sp) # 8-byte Folded Reload @@ -75,15 +65,9 @@ define internal void @test2() unnamed_addr nounwind { ; CHECK-NEXT: daddiu $4, $sp, 0 ; CHECK-NEXT: jal implicit_sret_decl2 ; CHECK-NEXT: nop -; CHECK-NEXT: # implicit-def: $at -; CHECK-NEXT: lwl $1, 20($sp) -; CHECK-NEXT: lwr $1, 23($sp) -; CHECK-NEXT: # implicit-def: $v0 -; CHECK-NEXT: lwl $2, 12($sp) -; CHECK-NEXT: lwr $2, 15($sp) -; CHECK-NEXT: # implicit-def: $v1 -; CHECK-NEXT: lwl $3, 4($sp) -; CHECK-NEXT: lwr $3, 7($sp) +; CHECK-NEXT: lw $1, 20($sp) +; CHECK-NEXT: lw $2, 12($sp) +; CHECK-NEXT: lw $3, 4($sp) ; CHECK-NEXT: # implicit-def: $a0_64 ; CHECK-NEXT: move $4, $3 ; CHECK-NEXT: # implicit-def: $a1_64 diff --git a/llvm/test/CodeGen/WebAssembly/offset.ll b/llvm/test/CodeGen/WebAssembly/offset.ll index d028338..8e89f2a 100644 --- a/llvm/test/CodeGen/WebAssembly/offset.ll +++ b/llvm/test/CodeGen/WebAssembly/offset.ll @@ -645,9 +645,9 @@ define void @aggregate_load_store({i32,i32,i32,i32}* %p, {i32,i32,i32,i32}* %q) ; CHECK-LABEL: aggregate_return: ; CHECK: i64.const $push[[L0:[0-9]+]]=, 0{{$}} -; CHECK: i64.store 8($0):p2align=2, $pop[[L0]]{{$}} +; CHECK: i64.store 8($0), $pop[[L0]]{{$}} ; CHECK: i64.const $push[[L1:[0-9]+]]=, 0{{$}} -; CHECK: i64.store 0($0):p2align=2, $pop[[L1]]{{$}} +; CHECK: i64.store 0($0), $pop[[L1]]{{$}} define {i32,i32,i32,i32} @aggregate_return() { ret {i32,i32,i32,i32} zeroinitializer } diff --git a/llvm/test/CodeGen/X86/pr34653.ll b/llvm/test/CodeGen/X86/pr34653.ll index 9a0b56a..2f63ac3 100644 --- a/llvm/test/CodeGen/X86/pr34653.ll +++ b/llvm/test/CodeGen/X86/pr34653.ll @@ -15,28 +15,22 @@ define void @pr34653() { ; CHECK-NEXT: subq $1536, %rsp # imm = 0x600 ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi ; CHECK-NEXT: callq test -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm1 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm2 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm3 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm4 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm5 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm6 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm7 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm8 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm9 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm10 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm11 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm12 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm13 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm14 -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm15 -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0 -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0 -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0 +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm5 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm6 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm7 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm9 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm10 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm11 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm12 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm13 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm14 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm15 = mem[0],zero ; CHECK-NEXT: vmovsd {{.*#+}} xmm16 = mem[0],zero ; CHECK-NEXT: vmovsd {{.*#+}} xmm17 = mem[0],zero ; CHECK-NEXT: vmovsd {{.*#+}} xmm18 = mem[0],zero @@ -60,17 +54,11 @@ define void @pr34653() { ; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload -; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload -; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload -; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload -; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8