From 189e5b4ab689a854e9e67c3288810c15b8f95923 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 13 Oct 2018 17:47:20 +0000 Subject: [PATCH] [LegalizeTypes] Prevent an assertion from PromoteIntRes_BSWAP and PromoteIntRes_BITREVERSE if the shift amount is too large for the VT returned by getShiftAmountTy Summary: getShiftAmountTy for X86 returns MVT::i8. If a BSWAP or BITREVERSE is created that requires promotion and the difference between the original VT and the promoted VT is more than 255 then we won't able to create the constant. This patch adds a check to replace the result from getShiftAmountTy to MVT::i32 if the difference won't fit. This should get legalized later when the shift is ultimately expanded since its clearly an illegal type that we're only promoting to make it a power of 2 bit width. Alternatively we could base the decision completely on the largest shift amount the promoted VT could use. Vectors should be immune here because getShiftAmountTy always returns the incoming VT for vectors. Only the scalar shift amount can be changed by the targets. Reviewers: eli.friedman, RKSimon, spatel Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D53232 llvm-svn: 344460 --- .../CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 28 +- llvm/test/CodeGen/X86/bitreverse.ll | 618 +++++++++++++++++++++ llvm/test/CodeGen/X86/bswap.ll | 150 +++++ 3 files changed, 788 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index e11a18f..064e9e5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -311,6 +311,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { CreateStackStoreLoad(InOp, OutVT)); } +// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount +// in the VT returned by getShiftAmountTy and to return a safe VT if we can't. +static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT, + const TargetLowering &TLI, + SelectionDAG &DAG) { + EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + // If the value won't fit in the prefered type, just use something safe. It + // will be legalized when the shift is expanded. + if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits()) + ShiftVT = MVT::i32; + return ShiftVT; +} + SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); EVT OVT = N->getValueType(0); @@ -318,10 +331,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDLoc dl(N); unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - return DAG.getNode( - ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), - DAG.getConstant(DiffBits, dl, - TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); + EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG); + return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), + DAG.getConstant(DiffBits, dl, ShiftVT)); } SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { @@ -331,10 +343,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { SDLoc dl(N); unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - return DAG.getNode( - ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), - DAG.getConstant(DiffBits, dl, - TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); + EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG); + return DAG.getNode(ISD::SRL, dl, NVT, + DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), + DAG.getConstant(DiffBits, dl, ShiftVT)); } SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { diff --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll index 2e35fde..aeac9e8 100644 --- a/llvm/test/CodeGen/X86/bitreverse.ll +++ b/llvm/test/CodeGen/X86/bitreverse.ll @@ -523,3 +523,621 @@ define <2 x i16> @undef_v2i16() { %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef) ret <2 x i16> %b } + +; Make sure we don't assert during type legalization promoting a large +; bitreverse due to the need for a large shift that won't fit in the i8 returned +; from getShiftAmountTy. +define i528 @large_promotion(i528 %A) nounwind { +; X86-LABEL: large_promotion: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $56, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: bswapl %ebx +; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: andl $252645135, %ebp # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ebp +; X86-NEXT: andl $-252645136, %ebx # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %ebx +; X86-NEXT: orl %ebp, %ebx +; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: andl $858993459, %ebp # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %ebx # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %ebx +; X86-NEXT: leal (%ebx,%ebp,4), %ebx +; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: andl $1431633920, %ebp # imm = 0x55550000 +; X86-NEXT: andl $-1431699456, %ebx # imm = 0xAAAA0000 +; X86-NEXT: shrl %ebx +; X86-NEXT: leal (%ebx,%ebp,2), %ebx +; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X86-NEXT: bswapl %edi +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ebx +; X86-NEXT: andl $-252645136, %edi # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: andl $858993459, %ebx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %edi # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %edi +; X86-NEXT: leal (%edi,%ebx,4), %edi +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: andl $1431655765, %ebx # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %edi # imm = 0xAAAAAAAA +; X86-NEXT: shrl %edi +; X86-NEXT: leal (%edi,%ebx,2), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: bswapl %esi +; X86-NEXT: movl %esi, %edi +; X86-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %edi +; X86-NEXT: andl $-252645136, %esi # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %esi +; X86-NEXT: orl %edi, %esi +; X86-NEXT: movl %esi, %edi +; X86-NEXT: andl $858993459, %edi # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %esi # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %esi +; X86-NEXT: leal (%esi,%edi,4), %esi +; X86-NEXT: movl %esi, %edi +; X86-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %esi # imm = 0xAAAAAAAA +; X86-NEXT: shrl %esi +; X86-NEXT: leal (%esi,%edi,2), %ebx +; X86-NEXT: bswapl %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %esi +; X86-NEXT: andl $-252645136, %edx # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %edx +; X86-NEXT: orl %esi, %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: andl $858993459, %esi # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %edx # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %edx +; X86-NEXT: leal (%edx,%esi,4), %edx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: andl $1431655765, %esi # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %edx # imm = 0xAAAAAAAA +; X86-NEXT: shrl %edx +; X86-NEXT: leal (%edx,%esi,2), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: bswapl %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %edx +; X86-NEXT: andl $-252645136, %ecx # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %ecx +; X86-NEXT: orl %edx, %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %ecx # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %ecx +; X86-NEXT: leal (%ecx,%edx,4), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %ecx # imm = 0xAAAAAAAA +; X86-NEXT: shrl %ecx +; X86-NEXT: leal (%ecx,%edx,2), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %eax +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %eax +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %eax +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %eax +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %eax +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %eax +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %eax +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %eax +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %eax +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %eax +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %eax +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%ecx,2), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: bswapl %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: andl $-252645136, %eax # imm = 0xF0F0F0F0 +; X86-NEXT: shrl $4, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NEXT: andl $-858993460, %eax # imm = 0xCCCCCCCC +; X86-NEXT: shrl $2, %eax +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%ecx,2), %edx +; X86-NEXT: movl (%esp), %esi # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shrdl $16, %eax, %esi +; X86-NEXT: shrdl $16, %ebx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: shrdl $16, %ecx, %ebx +; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shrdl $16, %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: shrdl $16, %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shrdl $16, %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: shrdl $16, %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shrdl $16, %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: shrdl $16, %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shrdl $16, %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: shrdl $16, %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X86-NEXT: shrdl $16, %ebp, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shrdl $16, %ebx, %ebp +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: shrdl $16, %eax, %ebx +; X86-NEXT: shrdl $16, %edi, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shrdl $16, %edx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %edi, 60(%eax) +; X86-NEXT: movl %ecx, 56(%eax) +; X86-NEXT: movl %ebx, 52(%eax) +; X86-NEXT: movl %ebp, 48(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 44(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 40(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 36(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 32(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 28(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 24(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 20(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 16(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 8(%eax) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 4(%eax) +; X86-NEXT: movl %esi, (%eax) +; X86-NEXT: shrl $16, %edx +; X86-NEXT: movw %dx, 64(%eax) +; X86-NEXT: addl $56, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 +; +; X64-LABEL: large_promotion: +; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: pushq %r15 +; X64-NEXT: pushq %r14 +; X64-NEXT: pushq %r13 +; X64-NEXT: pushq %r12 +; X64-NEXT: pushq %rbx +; X64-NEXT: movq %rdi, %r12 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; X64-NEXT: bswapq %rbx +; X64-NEXT: movabsq $1085102592571150095, %r13 # imm = 0xF0F0F0F0F0F0F0F +; X64-NEXT: movq %rbx, %r10 +; X64-NEXT: andq %r13, %r10 +; X64-NEXT: shlq $4, %r10 +; X64-NEXT: movabsq $-1085102592571150096, %rax # imm = 0xF0F0F0F0F0F0F0F0 +; X64-NEXT: andq %rax, %rbx +; X64-NEXT: shrq $4, %rbx +; X64-NEXT: orq %r10, %rbx +; X64-NEXT: movabsq $3689348814741910323, %r11 # imm = 0x3333333333333333 +; X64-NEXT: movq %rbx, %r10 +; X64-NEXT: andq %r11, %r10 +; X64-NEXT: movabsq $-3689348814741910324, %r14 # imm = 0xCCCCCCCCCCCCCCCC +; X64-NEXT: andq %r14, %rbx +; X64-NEXT: shrq $2, %rbx +; X64-NEXT: leaq (%rbx,%r10,4), %r10 +; X64-NEXT: movabsq $6148820866244280320, %rbx # imm = 0x5555000000000000 +; X64-NEXT: andq %r10, %rbx +; X64-NEXT: movabsq $-6149102341220990976, %rdi # imm = 0xAAAA000000000000 +; X64-NEXT: andq %r10, %rdi +; X64-NEXT: shrq %rdi +; X64-NEXT: leaq (%rdi,%rbx,2), %rdi +; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: bswapq %rbp +; X64-NEXT: movq %rbp, %rdi +; X64-NEXT: andq %r13, %rdi +; X64-NEXT: shlq $4, %rdi +; X64-NEXT: andq %rax, %rbp +; X64-NEXT: shrq $4, %rbp +; X64-NEXT: orq %rdi, %rbp +; X64-NEXT: movq %rbp, %rdi +; X64-NEXT: andq %r11, %rdi +; X64-NEXT: andq %r14, %rbp +; X64-NEXT: shrq $2, %rbp +; X64-NEXT: leaq (%rbp,%rdi,4), %rbp +; X64-NEXT: movabsq $6148914691236517205, %rbx # imm = 0x5555555555555555 +; X64-NEXT: movq %rbp, %r10 +; X64-NEXT: andq %rbx, %r10 +; X64-NEXT: movabsq $-6148914691236517206, %rdi # imm = 0xAAAAAAAAAAAAAAAA +; X64-NEXT: andq %rdi, %rbp +; X64-NEXT: shrq %rbp +; X64-NEXT: leaq (%rbp,%r10,2), %rbp +; X64-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; X64-NEXT: bswapq %rbp +; X64-NEXT: movq %rbp, %r10 +; X64-NEXT: andq %r13, %r10 +; X64-NEXT: shlq $4, %r10 +; X64-NEXT: andq %rax, %rbp +; X64-NEXT: movq %rax, %r15 +; X64-NEXT: shrq $4, %rbp +; X64-NEXT: orq %r10, %rbp +; X64-NEXT: movq %rbp, %r10 +; X64-NEXT: andq %r11, %r10 +; X64-NEXT: andq %r14, %rbp +; X64-NEXT: shrq $2, %rbp +; X64-NEXT: leaq (%rbp,%r10,4), %rbp +; X64-NEXT: movq %rbp, %r10 +; X64-NEXT: andq %rbx, %r10 +; X64-NEXT: andq %rdi, %rbp +; X64-NEXT: shrq %rbp +; X64-NEXT: leaq (%rbp,%r10,2), %rbp +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; X64-NEXT: bswapq %r10 +; X64-NEXT: movq %r10, %rax +; X64-NEXT: andq %r13, %rax +; X64-NEXT: shlq $4, %rax +; X64-NEXT: movq %r15, %rdi +; X64-NEXT: andq %r15, %r10 +; X64-NEXT: shrq $4, %r10 +; X64-NEXT: orq %rax, %r10 +; X64-NEXT: movq %r10, %rax +; X64-NEXT: andq %r11, %rax +; X64-NEXT: andq %r14, %r10 +; X64-NEXT: shrq $2, %r10 +; X64-NEXT: leaq (%r10,%rax,4), %rax +; X64-NEXT: movq %rax, %r10 +; X64-NEXT: andq %rbx, %r10 +; X64-NEXT: movabsq $-6148914691236517206, %r15 # imm = 0xAAAAAAAAAAAAAAAA +; X64-NEXT: andq %r15, %rax +; X64-NEXT: shrq %rax +; X64-NEXT: leaq (%rax,%r10,2), %r10 +; X64-NEXT: bswapq %r9 +; X64-NEXT: movq %r9, %rax +; X64-NEXT: andq %r13, %rax +; X64-NEXT: shlq $4, %rax +; X64-NEXT: andq %rdi, %r9 +; X64-NEXT: shrq $4, %r9 +; X64-NEXT: orq %rax, %r9 +; X64-NEXT: movq %r9, %rax +; X64-NEXT: andq %r11, %rax +; X64-NEXT: andq %r14, %r9 +; X64-NEXT: shrq $2, %r9 +; X64-NEXT: leaq (%r9,%rax,4), %rax +; X64-NEXT: movq %rax, %r9 +; X64-NEXT: andq %rbx, %r9 +; X64-NEXT: andq %r15, %rax +; X64-NEXT: shrq %rax +; X64-NEXT: leaq (%rax,%r9,2), %r9 +; X64-NEXT: bswapq %r8 +; X64-NEXT: movq %r8, %rax +; X64-NEXT: andq %r13, %rax +; X64-NEXT: shlq $4, %rax +; X64-NEXT: andq %rdi, %r8 +; X64-NEXT: shrq $4, %r8 +; X64-NEXT: orq %rax, %r8 +; X64-NEXT: movq %r8, %rax +; X64-NEXT: andq %r11, %rax +; X64-NEXT: andq %r14, %r8 +; X64-NEXT: shrq $2, %r8 +; X64-NEXT: leaq (%r8,%rax,4), %rax +; X64-NEXT: movq %rax, %r8 +; X64-NEXT: andq %rbx, %r8 +; X64-NEXT: andq %r15, %rax +; X64-NEXT: shrq %rax +; X64-NEXT: leaq (%rax,%r8,2), %r8 +; X64-NEXT: bswapq %rcx +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: andq %r13, %rax +; X64-NEXT: shlq $4, %rax +; X64-NEXT: andq %rdi, %rcx +; X64-NEXT: shrq $4, %rcx +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: andq %r11, %rax +; X64-NEXT: andq %r14, %rcx +; X64-NEXT: shrq $2, %rcx +; X64-NEXT: leaq (%rcx,%rax,4), %rax +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: andq %rbx, %rcx +; X64-NEXT: andq %r15, %rax +; X64-NEXT: shrq %rax +; X64-NEXT: leaq (%rax,%rcx,2), %rcx +; X64-NEXT: bswapq %rdx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: andq %r13, %rax +; X64-NEXT: shlq $4, %rax +; X64-NEXT: andq %rdi, %rdx +; X64-NEXT: shrq $4, %rdx +; X64-NEXT: orq %rax, %rdx +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: andq %r11, %rax +; X64-NEXT: andq %r14, %rdx +; X64-NEXT: shrq $2, %rdx +; X64-NEXT: leaq (%rdx,%rax,4), %rax +; X64-NEXT: movq %rax, %rdx +; X64-NEXT: andq %rbx, %rdx +; X64-NEXT: andq %r15, %rax +; X64-NEXT: shrq %rax +; X64-NEXT: leaq (%rax,%rdx,2), %rax +; X64-NEXT: bswapq %rsi +; X64-NEXT: andq %rsi, %r13 +; X64-NEXT: andq %rdi, %rsi +; X64-NEXT: shlq $4, %r13 +; X64-NEXT: shrq $4, %rsi +; X64-NEXT: orq %r13, %rsi +; X64-NEXT: andq %rsi, %r11 +; X64-NEXT: andq %r14, %rsi +; X64-NEXT: shrq $2, %rsi +; X64-NEXT: leaq (%rsi,%r11,4), %rdx +; X64-NEXT: andq %rdx, %rbx +; X64-NEXT: andq %r15, %rdx +; X64-NEXT: shrq %rdx +; X64-NEXT: leaq (%rdx,%rbx,2), %rdx +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; X64-NEXT: shrdq $48, %rdi, %rsi +; X64-NEXT: shrdq $48, %rbp, %rdi +; X64-NEXT: shrdq $48, %r10, %rbp +; X64-NEXT: shrdq $48, %r9, %r10 +; X64-NEXT: shrdq $48, %r8, %r9 +; X64-NEXT: shrdq $48, %rcx, %r8 +; X64-NEXT: shrdq $48, %rax, %rcx +; X64-NEXT: shrdq $48, %rdx, %rax +; X64-NEXT: movq %rax, 56(%r12) +; X64-NEXT: movq %rcx, 48(%r12) +; X64-NEXT: movq %r8, 40(%r12) +; X64-NEXT: movq %r9, 32(%r12) +; X64-NEXT: movq %r10, 24(%r12) +; X64-NEXT: movq %rbp, 16(%r12) +; X64-NEXT: movq %rdi, 8(%r12) +; X64-NEXT: movq %rsi, (%r12) +; X64-NEXT: shrq $48, %rdx +; X64-NEXT: movw %dx, 64(%r12) +; X64-NEXT: movq %r12, %rax +; X64-NEXT: popq %rbx +; X64-NEXT: popq %r12 +; X64-NEXT: popq %r13 +; X64-NEXT: popq %r14 +; X64-NEXT: popq %r15 +; X64-NEXT: popq %rbp +; X64-NEXT: retq + %Z = call i528 @llvm.bitreverse.i528(i528 %A) + ret i528 %Z +} +declare i528 @llvm.bitreverse.i528(i528) diff --git a/llvm/test/CodeGen/X86/bswap.ll b/llvm/test/CodeGen/X86/bswap.ll index 756dd7fa..4753fc2 100644 --- a/llvm/test/CodeGen/X86/bswap.ll +++ b/llvm/test/CodeGen/X86/bswap.ll @@ -206,3 +206,153 @@ define i64 @finally_useful_bswap() { ret i64 %swapped } +; Make sure we don't assert during type legalization promoting a large +; bswap due to the need for a large shift that won't fit in the i8 returned +; from getShiftAmountTy. +define i528 @large_promotion(i528 %A) nounwind { +; CHECK-LABEL: large_promotion: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $44, %esp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: bswapl %eax +; CHECK-NEXT: bswapl %ecx +; CHECK-NEXT: shrdl $16, %ecx, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: bswapl %edx +; CHECK-NEXT: shrdl $16, %edx, %ecx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: bswapl %esi +; CHECK-NEXT: shrdl $16, %esi, %edx +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: bswapl %edi +; CHECK-NEXT: shrdl $16, %edi, %esi +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: bswapl %ebx +; CHECK-NEXT: shrdl $16, %ebx, %edi +; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: bswapl %ebp +; CHECK-NEXT: shrdl $16, %ebp, %ebx +; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: bswapl %ecx +; CHECK-NEXT: shrdl $16, %ecx, %ebp +; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: bswapl %eax +; CHECK-NEXT: shrdl $16, %eax, %ecx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: bswapl %ecx +; CHECK-NEXT: shrdl $16, %ecx, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: bswapl %eax +; CHECK-NEXT: shrdl $16, %eax, %ecx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: bswapl %ebp +; CHECK-NEXT: shrdl $16, %ebp, %eax +; CHECK-NEXT: movl %eax, (%esp) # 4-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: bswapl %ebx +; CHECK-NEXT: shrdl $16, %ebx, %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: bswapl %esi +; CHECK-NEXT: shrdl $16, %esi, %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: bswapl %edx +; CHECK-NEXT: shrdl $16, %edx, %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: bswapl %ecx +; CHECK-NEXT: shrdl $16, %ecx, %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: bswapl %edi +; CHECK-NEXT: shrdl $16, %edi, %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %ecx, 60(%eax) +; CHECK-NEXT: movl %edx, 56(%eax) +; CHECK-NEXT: movl %esi, 52(%eax) +; CHECK-NEXT: movl %ebx, 48(%eax) +; CHECK-NEXT: movl %ebp, 44(%eax) +; CHECK-NEXT: movl (%esp), %ecx # 4-byte Reload +; CHECK-NEXT: movl %ecx, 40(%eax) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movl %ecx, 36(%eax) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movl %ecx, 32(%eax) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movl %ecx, 28(%eax) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movl %ecx, 24(%eax) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movl %ecx, 20(%eax) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movl %ecx, 16(%eax) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movl %ecx, 12(%eax) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movl %ecx, 8(%eax) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movl %ecx, 4(%eax) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movl %ecx, (%eax) +; CHECK-NEXT: shrl $16, %edi +; CHECK-NEXT: movw %di, 64(%eax) +; CHECK-NEXT: addl $44, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl $4 +; +; CHECK64-LABEL: large_promotion: +; CHECK64: # %bb.0: +; CHECK64-NEXT: pushq %rbx +; CHECK64-NEXT: movq %rdi, %rax +; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; CHECK64-NEXT: bswapq %r10 +; CHECK64-NEXT: bswapq %rdi +; CHECK64-NEXT: shrdq $48, %rdi, %r10 +; CHECK64-NEXT: bswapq %r11 +; CHECK64-NEXT: shrdq $48, %r11, %rdi +; CHECK64-NEXT: bswapq %rbx +; CHECK64-NEXT: shrdq $48, %rbx, %r11 +; CHECK64-NEXT: bswapq %r9 +; CHECK64-NEXT: shrdq $48, %r9, %rbx +; CHECK64-NEXT: bswapq %r8 +; CHECK64-NEXT: shrdq $48, %r8, %r9 +; CHECK64-NEXT: bswapq %rcx +; CHECK64-NEXT: shrdq $48, %rcx, %r8 +; CHECK64-NEXT: bswapq %rdx +; CHECK64-NEXT: shrdq $48, %rdx, %rcx +; CHECK64-NEXT: bswapq %rsi +; CHECK64-NEXT: shrdq $48, %rsi, %rdx +; CHECK64-NEXT: shrq $48, %rsi +; CHECK64-NEXT: movq %rdx, 56(%rax) +; CHECK64-NEXT: movq %rcx, 48(%rax) +; CHECK64-NEXT: movq %r8, 40(%rax) +; CHECK64-NEXT: movq %r9, 32(%rax) +; CHECK64-NEXT: movq %rbx, 24(%rax) +; CHECK64-NEXT: movq %r11, 16(%rax) +; CHECK64-NEXT: movq %rdi, 8(%rax) +; CHECK64-NEXT: movq %r10, (%rax) +; CHECK64-NEXT: movw %si, 64(%rax) +; CHECK64-NEXT: popq %rbx +; CHECK64-NEXT: retq + %Z = call i528 @llvm.bswap.i528(i528 %A) + ret i528 %Z +} +declare i528 @llvm.bswap.i528(i528) -- 2.7.4