From: Craig Topper Date: Wed, 5 Aug 2020 22:35:16 +0000 (-0700) Subject: [X86] Disable copy elision in LowerMemArgument for scalarized vectors when the loc... X-Git-Tag: llvmorg-13-init~15632 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=08b2d0a963dbbf54317a137d69f430b347d1bfae;p=platform%2Fupstream%2Fllvm.git [X86] Disable copy elision in LowerMemArgument for scalarized vectors when the loc VT is a different size than the original element. For example a v4f16 argument is scalarized to 4 i32 values. So the values are spread out instead of being packed tightly like in the original vector. Fixes PR47000. --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 46a3504..37097a1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3203,13 +3203,23 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, return DAG.getFrameIndex(FI, PtrVT); } + EVT ArgVT = Ins[i].ArgVT; + + // If this is a vector that has been split into multiple parts, and the + // scalar size of the parts don't match the vector element size, then we can't + // elide the copy. The parts will have padding between them instead of being + // packed like a vector. + bool ScalarizedAndExtendedVector = + ArgVT.isVector() && !VA.getLocVT().isVector() && + VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits(); + // This is an argument in memory. We might be able to perform copy elision. // If the argument is passed directly in memory without any extension, then we // can perform copy elision. Large vector types, for example, may be passed // indirectly by pointer. if (Flags.isCopyElisionCandidate() && - VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem) { - EVT ArgVT = Ins[i].ArgVT; + VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem && + !ScalarizedAndExtendedVector) { SDValue PartAddr; if (Ins[i].PartOffset == 0) { // If this is a one-part value or the first part of a multi-part value, diff --git a/llvm/test/CodeGen/X86/pr47000.ll b/llvm/test/CodeGen/X86/pr47000.ll index e6ddf3d..083aa78 100755 --- a/llvm/test/CodeGen/X86/pr47000.ll +++ b/llvm/test/CodeGen/X86/pr47000.ll @@ -16,17 +16,15 @@ define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind { ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: movw 176(%esp), %dx ; CHECK-NEXT: movw 172(%esp), %si -; CHECK-NEXT: movw 164(%esp), %di -; CHECK-NEXT: movw 166(%esp), %bx +; CHECK-NEXT: movw 168(%esp), %di +; CHECK-NEXT: movw 164(%esp), %bx ; CHECK-NEXT: movw 160(%esp), %bp ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: movw 156(%esp), %ax ; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movw 148(%esp), %ax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movw 150(%esp), %ax +; CHECK-NEXT: movw 152(%esp), %ax ; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload +; CHECK-NEXT: movw 148(%esp), %ax ; CHECK-NEXT: movw %ax, 112(%esp) ; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload ; CHECK-NEXT: movw %ax, 114(%esp) @@ -35,8 +33,8 @@ define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind { ; CHECK-NEXT: movw %bp, 118(%esp) ; CHECK-NEXT: movw %dx, 110(%esp) ; CHECK-NEXT: movw %si, 108(%esp) -; CHECK-NEXT: movw %bx, 106(%esp) -; CHECK-NEXT: movw %di, 104(%esp) +; CHECK-NEXT: movw %di, 106(%esp) +; CHECK-NEXT: movw %bx, 104(%esp) ; CHECK-NEXT: movzwl 118(%esp), %edx ; CHECK-NEXT: movzwl 116(%esp), %esi ; CHECK-NEXT: movzwl 114(%esp), %edi