From f067dd839eca3103e8afc49c6e0a74d944f25fdd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 27 Oct 2019 11:49:24 -0700 Subject: [PATCH] [LegalizeTypes] When promoting BITREVERSE/BSWAP don't take the shift amount into account when determining the shift amount VT. If the target's preferred shift amount VT can't hold any shift amount for the promoted VT, we should use i32. The specific shift amount shouldn't matter. The type will be adjusted later when the shift itself is type legalized. This avoids an assert in getNode. Fixes PR43820. --- .../CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 19 +- llvm/test/CodeGen/X86/pr43820.ll | 383 +++++++++++++++++++++ 2 files changed, 392 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/X86/pr43820.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 757f391..0e193ba 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -365,15 +365,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { CreateStackStoreLoad(InOp, OutVT)); } -// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount +// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount // in the VT returned by getShiftAmountTy and to return a safe VT if we can't. -static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT, - const TargetLowering &TLI, +static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI, SelectionDAG &DAG) { EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - // If the value won't fit in the prefered type, just use something safe. It - // will be legalized when the shift is expanded. - if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits()) + // If any possible shift value won't fit in the prefered type, just use + // something safe. It will be legalized when the shift is expanded. + if (!ShiftVT.isVector() && + ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits())) ShiftVT = MVT::i32; return ShiftVT; } @@ -385,7 +385,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDLoc dl(N); unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG); + EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), DAG.getConstant(DiffBits, dl, ShiftVT)); } @@ -397,7 +397,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { SDLoc dl(N); unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG); + EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), DAG.getConstant(DiffBits, dl, ShiftVT)); @@ -1058,8 +1058,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { if (N->getOpcode() == ISD::UMULO) { // Unsigned overflow occurred if the high part is non-zero. unsigned Shift = SmallVT.getScalarSizeInBits(); - EVT ShiftTy = getShiftAmountTyForConstant(Shift, Mul.getValueType(), - TLI, DAG); + EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG); SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, DAG.getConstant(Shift, DL, ShiftTy)); Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, diff --git a/llvm/test/CodeGen/X86/pr43820.ll b/llvm/test/CodeGen/X86/pr43820.ll new file mode 100644 index 0000000..5bdf787 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr43820.ll @@ -0,0 +1,383 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define i1000 @square(i1000 %A) nounwind { +; CHECK-LABEL: square: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; CHECK-NEXT: bswapq %rbx +; CHECK-NEXT: movabsq $1085102592571150095, %rdi # imm = 0xF0F0F0F0F0F0F0F +; CHECK-NEXT: movq %rbx, %rbp +; CHECK-NEXT: andq %rdi, %rbp +; CHECK-NEXT: shlq $4, %rbp +; CHECK-NEXT: movabsq $-1085102592571150096, %r11 # imm = 0xF0F0F0F0F0F0F0F0 +; CHECK-NEXT: andq %r11, %rbx +; CHECK-NEXT: movq %r11, %rax +; CHECK-NEXT: shrq $4, %rbx +; CHECK-NEXT: orq %rbp, %rbx +; CHECK-NEXT: movabsq $3689348814741910323, %r11 # imm = 0x3333333333333333 +; CHECK-NEXT: movq %rbx, %r14 +; CHECK-NEXT: andq %r11, %r14 +; CHECK-NEXT: movabsq $-3689348814741910324, %rbp # imm = 0xCCCCCCCCCCCCCCCC +; CHECK-NEXT: andq %rbp, %rbx +; CHECK-NEXT: movq %rbp, %r15 +; CHECK-NEXT: shrq $2, %rbx +; CHECK-NEXT: leaq (%rbx,%r14,4), %r14 +; CHECK-NEXT: movabsq $6148914691230924800, %rbx # imm = 0x5555555555000000 +; CHECK-NEXT: andq %r14, %rbx +; CHECK-NEXT: movabsq $-6148914691247702016, %rbp # imm = 0xAAAAAAAAAA000000 +; CHECK-NEXT: andq %r14, %rbp +; CHECK-NEXT: shrq %rbp +; CHECK-NEXT: leaq (%rbp,%rbx,2), %rbx +; CHECK-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: bswapq %r10 +; CHECK-NEXT: movq %r10, %rbx +; CHECK-NEXT: andq %rdi, %rbx +; CHECK-NEXT: shlq $4, %rbx +; CHECK-NEXT: andq %rax, %r10 +; CHECK-NEXT: shrq $4, %r10 +; CHECK-NEXT: orq %rbx, %r10 +; CHECK-NEXT: movq %r10, %rbx +; CHECK-NEXT: andq %r11, %rbx +; CHECK-NEXT: andq %r15, %r10 +; CHECK-NEXT: shrq $2, %r10 +; CHECK-NEXT: leaq (%r10,%rbx,4), %rbp +; CHECK-NEXT: movabsq $6148914691236517205, %rbx # imm = 0x5555555555555555 +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rbx, %r10 +; CHECK-NEXT: movabsq $-6148914691236517206, %r13 # imm = 0xAAAAAAAAAAAAAAAA +; CHECK-NEXT: andq %r13, %rbp +; CHECK-NEXT: shrq %rbp +; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; CHECK-NEXT: bswapq %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rdi, %r10 +; CHECK-NEXT: shlq $4, %r10 +; CHECK-NEXT: andq %rax, %rbp +; CHECK-NEXT: shrq $4, %rbp +; CHECK-NEXT: orq %r10, %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %r11, %r10 +; CHECK-NEXT: andq %r15, %rbp +; CHECK-NEXT: shrq $2, %rbp +; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rbx, %r10 +; CHECK-NEXT: andq %r13, %rbp +; CHECK-NEXT: shrq %rbp +; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; CHECK-NEXT: bswapq %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rdi, %r10 +; CHECK-NEXT: shlq $4, %r10 +; CHECK-NEXT: andq %rax, %rbp +; CHECK-NEXT: shrq $4, %rbp +; CHECK-NEXT: orq %r10, %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %r11, %r10 +; CHECK-NEXT: andq %r15, %rbp +; CHECK-NEXT: shrq $2, %rbp +; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rbx, %r10 +; CHECK-NEXT: andq %r13, %rbp +; CHECK-NEXT: shrq %rbp +; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; CHECK-NEXT: bswapq %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rdi, %r10 +; CHECK-NEXT: shlq $4, %r10 +; CHECK-NEXT: andq %rax, %rbp +; CHECK-NEXT: shrq $4, %rbp +; CHECK-NEXT: orq %r10, %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %r11, %r10 +; CHECK-NEXT: andq %r15, %rbp +; CHECK-NEXT: shrq $2, %rbp +; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rbx, %r10 +; CHECK-NEXT: andq %r13, %rbp +; CHECK-NEXT: shrq %rbp +; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; CHECK-NEXT: bswapq %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rdi, %r10 +; CHECK-NEXT: shlq $4, %r10 +; CHECK-NEXT: andq %rax, %rbp +; CHECK-NEXT: movq %rax, %r14 +; CHECK-NEXT: shrq $4, %rbp +; CHECK-NEXT: orq %r10, %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %r11, %r10 +; CHECK-NEXT: andq %r15, %rbp +; CHECK-NEXT: shrq $2, %rbp +; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rbx, %r10 +; CHECK-NEXT: andq %r13, %rbp +; CHECK-NEXT: shrq %rbp +; CHECK-NEXT: leaq (%rbp,%r10,2), %rax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; CHECK-NEXT: bswapq %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rdi, %r10 +; CHECK-NEXT: shlq $4, %r10 +; CHECK-NEXT: andq %r14, %rbp +; CHECK-NEXT: shrq $4, %rbp +; CHECK-NEXT: orq %r10, %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %r11, %r10 +; CHECK-NEXT: andq %r15, %rbp +; CHECK-NEXT: shrq $2, %rbp +; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rbx, %r10 +; CHECK-NEXT: andq %r13, %rbp +; CHECK-NEXT: shrq %rbp +; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; CHECK-NEXT: bswapq %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rdi, %r10 +; CHECK-NEXT: shlq $4, %r10 +; CHECK-NEXT: andq %r14, %rbp +; CHECK-NEXT: shrq $4, %rbp +; CHECK-NEXT: orq %r10, %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %r11, %r10 +; CHECK-NEXT: andq %r15, %rbp +; CHECK-NEXT: shrq $2, %rbp +; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rbx, %r10 +; CHECK-NEXT: andq %r13, %rbp +; CHECK-NEXT: shrq %rbp +; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; CHECK-NEXT: bswapq %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rdi, %r10 +; CHECK-NEXT: shlq $4, %r10 +; CHECK-NEXT: andq %r14, %rbp +; CHECK-NEXT: shrq $4, %rbp +; CHECK-NEXT: orq %r10, %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %r11, %r10 +; CHECK-NEXT: andq %r15, %rbp +; CHECK-NEXT: shrq $2, %rbp +; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rbx, %r10 +; CHECK-NEXT: andq %r13, %rbp +; CHECK-NEXT: shrq %rbp +; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; CHECK-NEXT: bswapq %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rdi, %r10 +; CHECK-NEXT: shlq $4, %r10 +; CHECK-NEXT: andq %r14, %rbp +; CHECK-NEXT: shrq $4, %rbp +; CHECK-NEXT: orq %r10, %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %r11, %r10 +; CHECK-NEXT: andq %r15, %rbp +; CHECK-NEXT: shrq $2, %rbp +; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rbx, %r10 +; CHECK-NEXT: andq %r13, %rbp +; CHECK-NEXT: shrq %rbp +; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; CHECK-NEXT: bswapq %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rdi, %r10 +; CHECK-NEXT: shlq $4, %r10 +; CHECK-NEXT: andq %r14, %rbp +; CHECK-NEXT: shrq $4, %rbp +; CHECK-NEXT: orq %r10, %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %r11, %r10 +; CHECK-NEXT: andq %r15, %rbp +; CHECK-NEXT: shrq $2, %rbp +; CHECK-NEXT: leaq (%rbp,%r10,4), %rbp +; CHECK-NEXT: movq %rbp, %r10 +; CHECK-NEXT: andq %rbx, %r10 +; CHECK-NEXT: andq %r13, %rbp +; CHECK-NEXT: shrq %rbp +; CHECK-NEXT: leaq (%rbp,%r10,2), %rbp +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: bswapq %r9 +; CHECK-NEXT: movq %r9, %rbp +; CHECK-NEXT: andq %rdi, %rbp +; CHECK-NEXT: shlq $4, %rbp +; CHECK-NEXT: andq %r14, %r9 +; CHECK-NEXT: shrq $4, %r9 +; CHECK-NEXT: orq %rbp, %r9 +; CHECK-NEXT: movq %r9, %rbp +; CHECK-NEXT: andq %r11, %rbp +; CHECK-NEXT: andq %r15, %r9 +; CHECK-NEXT: shrq $2, %r9 +; CHECK-NEXT: leaq (%r9,%rbp,4), %rbp +; CHECK-NEXT: movq %rbp, %r9 +; CHECK-NEXT: andq %rbx, %r9 +; CHECK-NEXT: andq %r13, %rbp +; CHECK-NEXT: shrq %rbp +; CHECK-NEXT: leaq (%rbp,%r9,2), %rbp +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: bswapq %r8 +; CHECK-NEXT: movq %r8, %rbp +; CHECK-NEXT: andq %rdi, %rbp +; CHECK-NEXT: shlq $4, %rbp +; CHECK-NEXT: andq %r14, %r8 +; CHECK-NEXT: shrq $4, %r8 +; CHECK-NEXT: orq %rbp, %r8 +; CHECK-NEXT: movq %r8, %rbp +; CHECK-NEXT: andq %r11, %rbp +; CHECK-NEXT: andq %r15, %r8 +; CHECK-NEXT: movq %r15, %r9 +; CHECK-NEXT: shrq $2, %r8 +; CHECK-NEXT: leaq (%r8,%rbp,4), %rbp +; CHECK-NEXT: movq %rbp, %r8 +; CHECK-NEXT: andq %rbx, %r8 +; CHECK-NEXT: andq %r13, %rbp +; CHECK-NEXT: shrq %rbp +; CHECK-NEXT: leaq (%rbp,%r8,2), %rbp +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: bswapq %rcx +; CHECK-NEXT: movq %rcx, %rbp +; CHECK-NEXT: andq %rdi, %rbp +; CHECK-NEXT: shlq $4, %rbp +; CHECK-NEXT: andq %r14, %rcx +; CHECK-NEXT: shrq $4, %rcx +; CHECK-NEXT: orq %rbp, %rcx +; CHECK-NEXT: movq %rcx, %rbp +; CHECK-NEXT: andq %r11, %rbp +; CHECK-NEXT: andq %r15, %rcx +; CHECK-NEXT: shrq $2, %rcx +; CHECK-NEXT: leaq (%rcx,%rbp,4), %rcx +; CHECK-NEXT: movq %rcx, %rbp +; CHECK-NEXT: andq %rbx, %rbp +; CHECK-NEXT: andq %r13, %rcx +; CHECK-NEXT: shrq %rcx +; CHECK-NEXT: leaq (%rcx,%rbp,2), %r15 +; CHECK-NEXT: bswapq %rdx +; CHECK-NEXT: movq %rdx, %rbp +; CHECK-NEXT: andq %rdi, %rbp +; CHECK-NEXT: shlq $4, %rbp +; CHECK-NEXT: andq %r14, %rdx +; CHECK-NEXT: shrq $4, %rdx +; CHECK-NEXT: orq %rbp, %rdx +; CHECK-NEXT: movq %rdx, %rbp +; CHECK-NEXT: andq %r11, %rbp +; CHECK-NEXT: andq %r9, %rdx +; CHECK-NEXT: shrq $2, %rdx +; CHECK-NEXT: leaq (%rdx,%rbp,4), %rdx +; CHECK-NEXT: movq %rdx, %rbp +; CHECK-NEXT: andq %rbx, %rbp +; CHECK-NEXT: andq %r13, %rdx +; CHECK-NEXT: shrq %rdx +; CHECK-NEXT: leaq (%rdx,%rbp,2), %rdx +; CHECK-NEXT: bswapq %rsi +; CHECK-NEXT: andq %rsi, %rdi +; CHECK-NEXT: andq %r14, %rsi +; CHECK-NEXT: shlq $4, %rdi +; CHECK-NEXT: shrq $4, %rsi +; CHECK-NEXT: orq %rdi, %rsi +; CHECK-NEXT: andq %rsi, %r11 +; CHECK-NEXT: andq %r9, %rsi +; CHECK-NEXT: shrq $2, %rsi +; CHECK-NEXT: leaq (%rsi,%r11,4), %rsi +; CHECK-NEXT: andq %rsi, %rbx +; CHECK-NEXT: andq %r13, %rsi +; CHECK-NEXT: shrq %rsi +; CHECK-NEXT: leaq (%rsi,%rbx,2), %r13 +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; CHECK-NEXT: shrdq $24, %rax, %r11 +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; CHECK-NEXT: shrdq $24, %rcx, %rax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload +; CHECK-NEXT: shrdq $24, %rbp, %rcx +; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload +; CHECK-NEXT: shrdq $24, %r12, %rbp +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload +; CHECK-NEXT: shrdq $24, %r14, %r12 +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; CHECK-NEXT: shrdq $24, %rbx, %r14 +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; CHECK-NEXT: shrdq $24, %r10, %rbx +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload +; CHECK-NEXT: shrdq $24, %r9, %r10 +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; CHECK-NEXT: shrdq $24, %r8, %r9 +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; CHECK-NEXT: shrdq $24, %rdi, %r8 +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; CHECK-NEXT: shrdq $24, %rsi, %rdi +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; CHECK-NEXT: shrdq $24, %rax, %rsi +; CHECK-NEXT: shrdq $24, %r15, %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: shrdq $24, %rdx, %r15 +; CHECK-NEXT: shrdq $24, %r13, %rdx +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; CHECK-NEXT: movq %rdx, 112(%rax) +; CHECK-NEXT: movq %r15, 104(%rax) +; CHECK-NEXT: movq %rcx, 96(%rax) +; CHECK-NEXT: movq %rsi, 88(%rax) +; CHECK-NEXT: movq %rdi, 80(%rax) +; CHECK-NEXT: movq %r8, 72(%rax) +; CHECK-NEXT: movq %r9, 64(%rax) +; CHECK-NEXT: movq %r10, 56(%rax) +; CHECK-NEXT: movq %rbx, 48(%rax) +; CHECK-NEXT: movq %r14, 40(%rax) +; CHECK-NEXT: movq %r12, 32(%rax) +; CHECK-NEXT: movq %rbp, 24(%rax) +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; CHECK-NEXT: movq %rcx, 16(%rax) +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; CHECK-NEXT: movq %rcx, 8(%rax) +; CHECK-NEXT: movq %r11, (%rax) +; CHECK-NEXT: movq %r13, %rcx +; CHECK-NEXT: shrq $56, %r13 +; CHECK-NEXT: movb %r13b, 124(%rax) +; CHECK-NEXT: shrq $24, %rcx +; CHECK-NEXT: movl %ecx, 120(%rax) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq + %Z = call i1000 @llvm.bitreverse.i1000(i1000 %A) + ret i1000 %Z +} + +declare i1000 @llvm.bitreverse.i1000(i1000) -- 2.7.4