From 005173cbb609f79adc2018e378bc6897cf84b06d Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sun, 22 Jan 2023 00:12:27 +0300 Subject: [PATCH] [X86] `X86TargetLowering`: override `allowsMemoryAccess()` The baseline `allowsMemoryAccess()` is wrong for X86. It assumes that aligned memory operations are always allowed, but that is not true. For example, We can not perform a 32-byte aligned non-temporal load of a 32-byte vector, without AVX2 that is, yet `allowsMemoryAccess()` will say it is allowed, so we may end up merging non-temporal loads, only to split them up to legalize them, and here we go again. NOTE: the test changes here are superfluous. The main effect is that without this change, in D141777, we'd get stuck endlessly merging and splitting non-temporal stores. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D141776 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 74 ++++-- llvm/lib/Target/X86/X86ISelLowering.h | 19 ++ llvm/test/CodeGen/X86/add-sub-bool.ll | 8 +- llvm/test/CodeGen/X86/bswap-wide-int.ll | 4 +- llvm/test/CodeGen/X86/fshl.ll | 68 +++--- llvm/test/CodeGen/X86/fshr.ll | 51 ++-- llvm/test/CodeGen/X86/i128-add.ll | 16 +- llvm/test/CodeGen/X86/icmp-shift-opt.ll | 28 +-- llvm/test/CodeGen/X86/legalize-shl-vec.ll | 128 +++++----- .../CodeGen/X86/merge-consecutive-stores-nt.ll | 8 +- llvm/test/CodeGen/X86/setcc-wide-types.ll | 262 +++++++++------------ llvm/test/CodeGen/X86/smin.ll | 26 +- llvm/test/CodeGen/X86/smul-with-overflow.ll | 4 +- .../CodeGen/X86/smulo-128-legalisation-lowering.ll | 20 +- llvm/test/CodeGen/X86/umin.ll | 26 +- llvm/test/CodeGen/X86/umul-with-overflow.ll | 4 +- llvm/test/CodeGen/X86/wide-integer-cmp.ll | 2 +- llvm/test/CodeGen/X86/xaluo128.ll | 24 +- 18 files changed, 404 insertions(+), 368 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f999e27..8ffc494 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2730,24 +2730,30 @@ bool X86TargetLowering::isSafeMemOpType(MVT VT) const { return true; } +static bool isBitAligned(Align Alignment, uint64_t SizeInBits) { + return (8 * Alignment.value()) % SizeInBits == 0; +} + +bool X86TargetLowering::isMemoryAccessFast(EVT VT, Align Alignment) const { + if (isBitAligned(Alignment, VT.getSizeInBits())) + return true; + switch (VT.getSizeInBits()) { + default: + // 8-byte and under are always assumed to be fast. + return true; + case 128: + return !Subtarget.isUnalignedMem16Slow(); + case 256: + return !Subtarget.isUnalignedMem32Slow(); + // TODO: What about AVX-512 (512-bit) accesses? + } +} + bool X86TargetLowering::allowsMisalignedMemoryAccesses( EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const { - if (Fast) { - switch (VT.getSizeInBits()) { - default: - // 8-byte and under are always assumed to be fast. - *Fast = 1; - break; - case 128: - *Fast = !Subtarget.isUnalignedMem16Slow(); - break; - case 256: - *Fast = !Subtarget.isUnalignedMem32Slow(); - break; - // TODO: What about AVX-512 (512-bit) accesses? - } - } + if (Fast) + *Fast = isMemoryAccessFast(VT, Alignment); // NonTemporal vector memory ops must be aligned. if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) { // NT loads can only be vector aligned, so if its less aligned than the @@ -2762,6 +2768,44 @@ bool X86TargetLowering::allowsMisalignedMemoryAccesses( return true; } +bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context, + const DataLayout &DL, EVT VT, + unsigned AddrSpace, Align Alignment, + MachineMemOperand::Flags Flags, + unsigned *Fast) const { + if (Fast) + *Fast = isMemoryAccessFast(VT, Alignment); + if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) { + if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, + /*Fast=*/nullptr)) + return true; + // NonTemporal vector memory ops are special, and must be aligned. + if (!isBitAligned(Alignment, VT.getSizeInBits())) + return false; + switch (VT.getSizeInBits()) { + case 128: + if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41()) + return true; + if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2()) + return true; + return false; + case 256: + if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2()) + return true; + if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX()) + return true; + return false; + case 512: + if (Subtarget.hasAVX512()) + return true; + return false; + default: + return false; // Don't have NonTemporal vector memory ops of this size. + } + } + return true; +} + /// Return the entry encoding for a jump table in the /// current function. The returned value is a member of the /// MachineJumpTableInfo::JTEntryKind enum. diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index c08227b..c5c1150 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1003,12 +1003,31 @@ namespace llvm { /// legal as the hook is used before type legalization. bool isSafeMemOpType(MVT VT) const override; + bool isMemoryAccessFast(EVT VT, Align Alignment) const; + /// Returns true if the target allows unaligned memory accesses of the /// specified type. Returns whether it is "fast" in the last argument. bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override; + /// This function returns true if the memory access is aligned or if the + /// target allows this specific unaligned memory access. If the access is + /// allowed, the optional final parameter returns a relative speed of the + /// access (as defined by the target). + bool allowsMemoryAccess( + LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, + Align Alignment, + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + unsigned *Fast = nullptr) const override; + + bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, + const MachineMemOperand &MMO, + unsigned *Fast) const { + return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), + MMO.getAlign(), MMO.getFlags(), Fast); + } + /// Provide custom lowering hooks for some operations. /// SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; diff --git a/llvm/test/CodeGen/X86/add-sub-bool.ll b/llvm/test/CodeGen/X86/add-sub-bool.ll index 17eda59..c2bfcf5 100644 --- a/llvm/test/CodeGen/X86/add-sub-bool.ll +++ b/llvm/test/CodeGen/X86/add-sub-bool.ll @@ -113,17 +113,17 @@ define i128 @test_i128_add_add_idx(i128 %x, i128 %y, i128 %z) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: addl {{[0-9]+}}(%esp), %esi ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi -; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: btl $5, {{[0-9]+}}(%esp) ; X86-NEXT: adcl $0, %esi ; X86-NEXT: adcl $0, %edi -; X86-NEXT: adcl $0, %edx ; X86-NEXT: adcl $0, %ecx +; X86-NEXT: adcl $0, %edx ; X86-NEXT: movl %edi, 4(%eax) ; X86-NEXT: movl %esi, (%eax) -; X86-NEXT: movl %edx, 8(%eax) -; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: movl %ecx, 8(%eax) +; X86-NEXT: movl %edx, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl $4 diff --git a/llvm/test/CodeGen/X86/bswap-wide-int.ll b/llvm/test/CodeGen/X86/bswap-wide-int.ll index 1ba107a..6d5e995 100644 --- a/llvm/test/CodeGen/X86/bswap-wide-int.ll +++ b/llvm/test/CodeGen/X86/bswap-wide-int.ll @@ -71,8 +71,8 @@ define i128 @bswap_i128(i128 %a0) nounwind { ; X86-MOVBE-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-MOVBE-NEXT: movbel %esi, 12(%eax) ; X86-MOVBE-NEXT: movbel %edi, 8(%eax) -; X86-MOVBE-NEXT: movbel %edx, 4(%eax) -; X86-MOVBE-NEXT: movbel %ecx, (%eax) +; X86-MOVBE-NEXT: movbel %ecx, 4(%eax) +; X86-MOVBE-NEXT: movbel %edx, (%eax) ; X86-MOVBE-NEXT: popl %esi ; X86-MOVBE-NEXT: popl %edi ; X86-MOVBE-NEXT: retl $4 diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll index bf561a19..36bf74f 100644 --- a/llvm/test/CodeGen/X86/fshl.ll +++ b/llvm/test/CodeGen/X86/fshl.ll @@ -275,12 +275,12 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-FAST-NEXT: testb $64, %cl ; X86-FAST-NEXT: jne .LBB6_1 ; X86-FAST-NEXT: # %bb.2: -; X86-FAST-NEXT: movl %edi, %eax -; X86-FAST-NEXT: movl %esi, %edi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-FAST-NEXT: movl %ebx, %ebp ; X86-FAST-NEXT: movl %edx, %ebx ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-FAST-NEXT: movl %edi, %eax +; X86-FAST-NEXT: movl %esi, %edi +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-FAST-NEXT: testb $32, %cl ; X86-FAST-NEXT: je .LBB6_5 ; X86-FAST-NEXT: .LBB6_4: @@ -324,72 +324,72 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-SLOW-NEXT: pushl %edi ; X86-SLOW-NEXT: pushl %esi ; X86-SLOW-NEXT: pushl %eax -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: testb $64, %al ; X86-SLOW-NEXT: jne .LBB6_1 ; X86-SLOW-NEXT: # %bb.2: -; X86-SLOW-NEXT: movl %ebp, %ecx -; X86-SLOW-NEXT: movl %edi, %ebp +; X86-SLOW-NEXT: movl %edx, %ebp +; X86-SLOW-NEXT: movl %edi, %edx ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SLOW-NEXT: movl %edx, %ebx -; X86-SLOW-NEXT: movl %esi, %edx +; X86-SLOW-NEXT: movl %ebx, %ecx +; X86-SLOW-NEXT: movl %esi, %ebx ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-SLOW-NEXT: testb $32, %al ; X86-SLOW-NEXT: je .LBB6_5 ; X86-SLOW-NEXT: .LBB6_4: -; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill -; X86-SLOW-NEXT: movl %ebp, %esi -; X86-SLOW-NEXT: movl %edx, %ebp +; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: movl %ebx, %edi +; X86-SLOW-NEXT: movl %edx, %ebx ; X86-SLOW-NEXT: movl %ecx, %edx ; X86-SLOW-NEXT: jmp .LBB6_6 ; X86-SLOW-NEXT: .LBB6_1: -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-SLOW-NEXT: testb $32, %al ; X86-SLOW-NEXT: jne .LBB6_4 ; X86-SLOW-NEXT: .LBB6_5: -; X86-SLOW-NEXT: movl %ecx, %ebx -; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: movl %ecx, %ebp +; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill ; X86-SLOW-NEXT: .LBB6_6: -; X86-SLOW-NEXT: movl %edx, %edi +; X86-SLOW-NEXT: movl %edx, %esi ; X86-SLOW-NEXT: movl %eax, %ecx -; X86-SLOW-NEXT: shll %cl, %edi -; X86-SLOW-NEXT: shrl %ebx +; X86-SLOW-NEXT: shll %cl, %esi +; X86-SLOW-NEXT: shrl %ebp ; X86-SLOW-NEXT: movb %al, %ch ; X86-SLOW-NEXT: notb %ch ; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: shrl %cl, %ebx -; X86-SLOW-NEXT: orl %edi, %ebx -; X86-SLOW-NEXT: movl %ebp, %edi +; X86-SLOW-NEXT: shrl %cl, %ebp +; X86-SLOW-NEXT: orl %esi, %ebp +; X86-SLOW-NEXT: movl %ebx, %esi ; X86-SLOW-NEXT: movb %al, %cl -; X86-SLOW-NEXT: shll %cl, %edi +; X86-SLOW-NEXT: shll %cl, %esi ; X86-SLOW-NEXT: shrl %edx ; X86-SLOW-NEXT: movb %ch, %cl ; X86-SLOW-NEXT: shrl %cl, %edx -; X86-SLOW-NEXT: orl %edi, %edx -; X86-SLOW-NEXT: movl %esi, %edi +; X86-SLOW-NEXT: orl %esi, %edx +; X86-SLOW-NEXT: movl %edi, %esi ; X86-SLOW-NEXT: movb %al, %cl -; X86-SLOW-NEXT: shll %cl, %edi -; X86-SLOW-NEXT: shrl %ebp +; X86-SLOW-NEXT: shll %cl, %esi +; X86-SLOW-NEXT: shrl %ebx ; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: shrl %cl, %ebp -; X86-SLOW-NEXT: orl %edi, %ebp +; X86-SLOW-NEXT: shrl %cl, %ebx +; X86-SLOW-NEXT: orl %esi, %ebx ; X86-SLOW-NEXT: movb %al, %cl ; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-SLOW-NEXT: shll %cl, %eax -; X86-SLOW-NEXT: shrl %esi +; X86-SLOW-NEXT: shrl %edi ; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: shrl %cl, %esi -; X86-SLOW-NEXT: orl %eax, %esi +; X86-SLOW-NEXT: shrl %cl, %edi +; X86-SLOW-NEXT: orl %eax, %edi ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: movl %esi, 12(%eax) -; X86-SLOW-NEXT: movl %ebp, 8(%eax) +; X86-SLOW-NEXT: movl %edi, 12(%eax) +; X86-SLOW-NEXT: movl %ebx, 8(%eax) ; X86-SLOW-NEXT: movl %edx, 4(%eax) -; X86-SLOW-NEXT: movl %ebx, (%eax) +; X86-SLOW-NEXT: movl %ebp, (%eax) ; X86-SLOW-NEXT: addl $4, %esp ; X86-SLOW-NEXT: popl %esi ; X86-SLOW-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll index eb1f040..367a3dd 100644 --- a/llvm/test/CodeGen/X86/fshr.ll +++ b/llvm/test/CodeGen/X86/fshr.ll @@ -263,20 +263,20 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-FAST-NEXT: pushl %esi ; X86-FAST-NEXT: pushl %eax ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-FAST-NEXT: testb $64, %cl ; X86-FAST-NEXT: je .LBB6_1 ; X86-FAST-NEXT: # %bb.2: +; X86-FAST-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-FAST-NEXT: movl %esi, %edx +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-FAST-NEXT: movl %edi, %ebp ; X86-FAST-NEXT: movl %ebx, %edi ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-FAST-NEXT: movl %esi, (%esp) # 4-byte Spill -; X86-FAST-NEXT: movl %edx, %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: testb $32, %cl ; X86-FAST-NEXT: je .LBB6_4 ; X86-FAST-NEXT: jmp .LBB6_5 @@ -287,20 +287,20 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-FAST-NEXT: testb $32, %cl ; X86-FAST-NEXT: jne .LBB6_5 ; X86-FAST-NEXT: .LBB6_4: -; X86-FAST-NEXT: movl %edx, %ebx -; X86-FAST-NEXT: movl %edi, %edx -; X86-FAST-NEXT: movl %esi, %edi -; X86-FAST-NEXT: movl %ebp, %esi +; X86-FAST-NEXT: movl %esi, %ebx +; X86-FAST-NEXT: movl %edi, %esi +; X86-FAST-NEXT: movl %edx, %edi +; X86-FAST-NEXT: movl %ebp, %edx ; X86-FAST-NEXT: movl (%esp), %ebp # 4-byte Reload ; X86-FAST-NEXT: .LBB6_5: -; X86-FAST-NEXT: shrdl %cl, %esi, %ebp -; X86-FAST-NEXT: shrdl %cl, %edi, %esi -; X86-FAST-NEXT: shrdl %cl, %edx, %edi +; X86-FAST-NEXT: shrdl %cl, %edx, %ebp +; X86-FAST-NEXT: shrdl %cl, %edi, %edx +; X86-FAST-NEXT: shrdl %cl, %esi, %edi ; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-FAST-NEXT: shrdl %cl, %ebx, %edx -; X86-FAST-NEXT: movl %edx, 12(%eax) +; X86-FAST-NEXT: shrdl %cl, %ebx, %esi +; X86-FAST-NEXT: movl %esi, 12(%eax) ; X86-FAST-NEXT: movl %edi, 8(%eax) -; X86-FAST-NEXT: movl %esi, 4(%eax) +; X86-FAST-NEXT: movl %edx, 4(%eax) ; X86-FAST-NEXT: movl %ebp, (%eax) ; X86-FAST-NEXT: addl $4, %esp ; X86-FAST-NEXT: popl %esi @@ -316,25 +316,25 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-SLOW-NEXT: pushl %edi ; X86-SLOW-NEXT: pushl %esi ; X86-SLOW-NEXT: subl $8, %esp -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-SLOW-NEXT: testb $64, %cl ; X86-SLOW-NEXT: je .LBB6_1 ; X86-SLOW-NEXT: # %bb.2: -; X86-SLOW-NEXT: movl %ebx, %edx -; X86-SLOW-NEXT: movl %edi, %ebx -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-SLOW-NEXT: movl %ebp, %eax -; X86-SLOW-NEXT: movl %esi, %ebp -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SLOW-NEXT: movl %ebx, %ebp +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SLOW-NEXT: movl %esi, %edx +; X86-SLOW-NEXT: movl %edi, %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-SLOW-NEXT: testb $32, %cl ; X86-SLOW-NEXT: jne .LBB6_5 ; X86-SLOW-NEXT: .LBB6_4: -; X86-SLOW-NEXT: movl %esi, %edi -; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: movl %ebx, %edi +; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill ; X86-SLOW-NEXT: movl %ebp, %esi ; X86-SLOW-NEXT: movl %edx, %ebp ; X86-SLOW-NEXT: movl %eax, %edx @@ -345,8 +345,7 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-SLOW-NEXT: testb $32, %cl ; X86-SLOW-NEXT: je .LBB6_4 ; X86-SLOW-NEXT: .LBB6_5: -; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill -; X86-SLOW-NEXT: movl %ebx, %esi +; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill ; X86-SLOW-NEXT: .LBB6_6: ; X86-SLOW-NEXT: shrl %cl, %edx ; X86-SLOW-NEXT: movl %ecx, %ebx diff --git a/llvm/test/CodeGen/X86/i128-add.ll b/llvm/test/CodeGen/X86/i128-add.ll index b033fc1..2849e44 100644 --- a/llvm/test/CodeGen/X86/i128-add.ll +++ b/llvm/test/CodeGen/X86/i128-add.ll @@ -14,16 +14,16 @@ define i128 @add_i128(i128 %x, i128 %y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: addl {{[0-9]+}}(%esp), %esi ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi -; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: addl $1, %esi ; X86-NEXT: adcl $0, %edi -; X86-NEXT: adcl $0, %edx ; X86-NEXT: adcl $0, %ecx +; X86-NEXT: adcl $0, %edx ; X86-NEXT: movl %edi, 4(%eax) ; X86-NEXT: movl %esi, (%eax) -; X86-NEXT: movl %edx, 8(%eax) -; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: movl %ecx, 8(%eax) +; X86-NEXT: movl %edx, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl $4 @@ -55,16 +55,16 @@ define <1 x i128> @add_v1i128(<1 x i128> %x, <1 x i128> %y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: addl {{[0-9]+}}(%esp), %esi ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi -; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: addl $1, %esi ; X86-NEXT: adcl $0, %edi -; X86-NEXT: adcl $0, %edx ; X86-NEXT: adcl $0, %ecx +; X86-NEXT: adcl $0, %edx ; X86-NEXT: movl %edi, 4(%eax) ; X86-NEXT: movl %esi, (%eax) -; X86-NEXT: movl %edx, 8(%eax) -; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: movl %ecx, 8(%eax) +; X86-NEXT: movl %edx, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl $4 diff --git a/llvm/test/CodeGen/X86/icmp-shift-opt.ll b/llvm/test/CodeGen/X86/icmp-shift-opt.ll index 38815d9..7482de0 100644 --- a/llvm/test/CodeGen/X86/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/X86/icmp-shift-opt.ll @@ -75,12 +75,12 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl %ecx, %edx ; X86-NEXT: orl %eax, %edx -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: shldl $15, %edx, %eax +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: shldl $15, %edx, %ecx ; X86-NEXT: sete %al ; X86-NEXT: retl ; @@ -100,12 +100,12 @@ define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl %ecx, %edx ; X86-NEXT: orl %eax, %edx -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: shldl $15, %edx, %eax +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: shldl $15, %edx, %ecx ; X86-NEXT: setne %al ; X86-NEXT: retl ; @@ -173,22 +173,22 @@ define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shldl $17, %esi, %edx -; X86-NEXT: shldl $17, %ecx, %esi +; X86-NEXT: shldl $17, %edx, %esi +; X86-NEXT: shldl $17, %ecx, %edx ; X86-NEXT: shldl $17, %eax, %ecx ; X86-NEXT: shll $17, %eax ; X86-NEXT: movl %ecx, %edi -; X86-NEXT: orl %edx, %edi +; X86-NEXT: orl %esi, %edi ; X86-NEXT: movl %eax, %ebx -; X86-NEXT: orl %esi, %ebx +; X86-NEXT: orl %edx, %ebx ; X86-NEXT: orl %edi, %ebx ; X86-NEXT: sete %bl -; X86-NEXT: pushl %edx ; X86-NEXT: pushl %esi +; X86-NEXT: pushl %edx ; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %eax ; X86-NEXT: calll use@PLT diff --git a/llvm/test/CodeGen/X86/legalize-shl-vec.ll b/llvm/test/CodeGen/X86/legalize-shl-vec.ll index 845fc60..2c24db9 100644 --- a/llvm/test/CodeGen/X86/legalize-shl-vec.ll +++ b/llvm/test/CodeGen/X86/legalize-shl-vec.ll @@ -6,8 +6,8 @@ define <2 x i256> @test_shl(<2 x i256> %In) { ; X32-LABEL: test_shl: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: shldl $2, %ecx, %edx ; X32-NEXT: movl %edx, 60(%eax) ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -48,13 +48,13 @@ define <2 x i256> @test_shl(<2 x i256> %In) { ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi -; X64-NEXT: shldq $2, %rcx, %rdx -; X64-NEXT: shldq $2, %rdi, %rcx +; X64-NEXT: shldq $2, %rdx, %rcx +; X64-NEXT: shldq $2, %rdi, %rdx ; X64-NEXT: shldq $2, %r9, %rdi ; X64-NEXT: shlq $63, %rsi ; X64-NEXT: shlq $2, %r9 -; X64-NEXT: movq %rdx, 56(%rax) -; X64-NEXT: movq %rcx, 48(%rax) +; X64-NEXT: movq %rcx, 56(%rax) +; X64-NEXT: movq %rdx, 48(%rax) ; X64-NEXT: movq %rdi, 40(%rax) ; X64-NEXT: movq %r9, 32(%rax) ; X64-NEXT: movq %rsi, 24(%rax) @@ -84,36 +84,37 @@ define <2 x i256> @test_srl(<2 x i256> %In) { ; X32-NEXT: .cfi_offset %edi, -16 ; X32-NEXT: .cfi_offset %ebx, -12 ; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-NEXT: movl %ebx, %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl %ebp, %ebx +; X32-NEXT: shldl $28, %edx, %ebx +; X32-NEXT: shldl $28, %esi, %edx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: shldl $28, %ecx, %esi +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: shldl $28, %edi, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: shldl $28, %esi, %edi -; X32-NEXT: shldl $28, %edx, %esi -; X32-NEXT: shldl $28, %eax, %edx -; X32-NEXT: shldl $28, %ebp, %eax -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: shldl $28, %eax, %edi +; X32-NEXT: movl %eax, %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shldl $28, %eax, %ebp -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: shrdl $4, %eax, %ecx -; X32-NEXT: shrl $4, %ebx +; X32-NEXT: shldl $28, %eax, %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: shrdl $4, %eax, %edx +; X32-NEXT: shrl $4, %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %ebx, 60(%eax) -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebp, 60(%eax) ; X32-NEXT: movl %ebx, 56(%eax) -; X32-NEXT: movl %edi, 52(%eax) -; X32-NEXT: movl %esi, 48(%eax) -; X32-NEXT: movl %edx, 44(%eax) -; X32-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NEXT: movl %edx, 40(%eax) -; X32-NEXT: movl %ebp, 36(%eax) -; X32-NEXT: movl %ecx, 32(%eax) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, 52(%eax) +; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, 48(%eax) +; X32-NEXT: movl %ecx, 44(%eax) +; X32-NEXT: movl %edi, 40(%eax) +; X32-NEXT: movl %esi, 36(%eax) +; X32-NEXT: movl %edx, 32(%eax) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: shrl $31, %ecx ; X32-NEXT: movl %ecx, (%eax) @@ -143,12 +144,12 @@ define <2 x i256> @test_srl(<2 x i256> %In) { ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi ; X64-NEXT: shrdq $4, %rsi, %r9 -; X64-NEXT: shrdq $4, %rcx, %rsi +; X64-NEXT: shrdq $4, %rdx, %rsi +; X64-NEXT: shrdq $4, %rcx, %rdx ; X64-NEXT: shrq $63, %r8 -; X64-NEXT: shrdq $4, %rdx, %rcx -; X64-NEXT: shrq $4, %rdx -; X64-NEXT: movq %rdx, 56(%rdi) -; X64-NEXT: movq %rcx, 48(%rdi) +; X64-NEXT: shrq $4, %rcx +; X64-NEXT: movq %rcx, 56(%rdi) +; X64-NEXT: movq %rdx, 48(%rdi) ; X64-NEXT: movq %rsi, 40(%rdi) ; X64-NEXT: movq %r9, 32(%rdi) ; X64-NEXT: movq %r8, (%rdi) @@ -178,36 +179,37 @@ define <2 x i256> @test_sra(<2 x i256> %In) { ; X32-NEXT: .cfi_offset %edi, -16 ; X32-NEXT: .cfi_offset %ebx, -12 ; X32-NEXT: .cfi_offset %ebp, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-NEXT: movl %ebx, %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl %ebp, %ebx +; X32-NEXT: shldl $26, %edx, %ebx +; X32-NEXT: shldl $26, %esi, %edx +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: shldl $26, %ecx, %esi +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: shldl $26, %edi, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: shldl $26, %esi, %edi -; X32-NEXT: shldl $26, %edx, %esi -; X32-NEXT: shldl $26, %eax, %edx -; X32-NEXT: shldl $26, %ebp, %eax -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: shldl $26, %eax, %edi +; X32-NEXT: movl %eax, %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: shldl $26, %eax, %ebp -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: shrdl $6, %eax, %ecx -; X32-NEXT: sarl $6, %ebx +; X32-NEXT: shldl $26, %eax, %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: shrdl $6, %eax, %edx +; X32-NEXT: sarl $6, %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %ebx, 60(%eax) -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebp, 60(%eax) ; X32-NEXT: movl %ebx, 56(%eax) -; X32-NEXT: movl %edi, 52(%eax) -; X32-NEXT: movl %esi, 48(%eax) -; X32-NEXT: movl %edx, 44(%eax) -; X32-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NEXT: movl %edx, 40(%eax) -; X32-NEXT: movl %ebp, 36(%eax) -; X32-NEXT: movl %ecx, 32(%eax) +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, 52(%eax) +; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: movl %ebx, 48(%eax) +; X32-NEXT: movl %ecx, 44(%eax) +; X32-NEXT: movl %edi, 40(%eax) +; X32-NEXT: movl %esi, 36(%eax) +; X32-NEXT: movl %edx, 32(%eax) ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: sarl $31, %ecx ; X32-NEXT: movl %ecx, 28(%eax) @@ -237,12 +239,12 @@ define <2 x i256> @test_sra(<2 x i256> %In) { ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi ; X64-NEXT: shrdq $6, %rsi, %r9 -; X64-NEXT: shrdq $6, %rcx, %rsi +; X64-NEXT: shrdq $6, %rdx, %rsi +; X64-NEXT: shrdq $6, %rcx, %rdx ; X64-NEXT: sarq $63, %r8 -; X64-NEXT: shrdq $6, %rdx, %rcx -; X64-NEXT: sarq $6, %rdx -; X64-NEXT: movq %rdx, 56(%rdi) -; X64-NEXT: movq %rcx, 48(%rdi) +; X64-NEXT: sarq $6, %rcx +; X64-NEXT: movq %rcx, 56(%rdi) +; X64-NEXT: movq %rdx, 48(%rdi) ; X64-NEXT: movq %rsi, 40(%rdi) ; X64-NEXT: movq %r9, 32(%rdi) ; X64-NEXT: movq %r8, 24(%rdi) diff --git a/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll b/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll index 93052e4..c080569 100644 --- a/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll @@ -48,10 +48,10 @@ define void @merge_2_v4f32_align32(ptr %a0, ptr %a1) nounwind { ; ; X64-AVX1-LABEL: merge_2_v4f32_align32: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; X64-AVX1-NEXT: vmovntdqa (%rdi), %xmm1 -; X64-AVX1-NEXT: vmovntdq %xmm1, (%rsi) -; X64-AVX1-NEXT: vmovntdq %xmm0, 16(%rsi) +; X64-AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; X64-AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; X64-AVX1-NEXT: vmovntdq %xmm0, (%rsi) +; X64-AVX1-NEXT: vmovntdq %xmm1, 16(%rsi) ; X64-AVX1-NEXT: retq ; ; X64-AVX2-LABEL: merge_2_v4f32_align32: diff --git a/llvm/test/CodeGen/X86/setcc-wide-types.ll b/llvm/test/CodeGen/X86/setcc-wide-types.ll index 25c0719..44a6fad 100644 --- a/llvm/test/CodeGen/X86/setcc-wide-types.ll +++ b/llvm/test/CodeGen/X86/setcc-wide-types.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=ANY --check-prefix=NO512 --check-prefix=SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=ANY --check-prefix=NO512 --check-prefix=SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=ANY --check-prefix=NO512 --check-prefix=SSE --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=ANY --check-prefix=NO512 --check-prefix=SSE --check-prefix=SSE41 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=ANY --check-prefix=NO512 --check-prefix=AVXANY --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=ANY --check-prefix=NO512 --check-prefix=AVXANY --check-prefix=AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=ANY --check-prefix=AVXANY --check-prefix=AVX512 --check-prefix=AVX512F @@ -734,63 +734,34 @@ define i32 @eq_i128_pair(ptr %a, ptr %b) { ; if we allowed 2 pairs of 32-byte loads per block. define i32 @ne_i256_pair(ptr %a, ptr %b) { -; SSE2-LABEL: ne_i256_pair: -; SSE2: # %bb.0: -; SSE2-NEXT: movq 16(%rdi), %rax -; SSE2-NEXT: movq 24(%rdi), %rcx -; SSE2-NEXT: movq (%rdi), %rdx -; SSE2-NEXT: movq 8(%rdi), %r8 -; SSE2-NEXT: xorq 8(%rsi), %r8 -; SSE2-NEXT: xorq 24(%rsi), %rcx -; SSE2-NEXT: xorq (%rsi), %rdx -; SSE2-NEXT: xorq 16(%rsi), %rax -; SSE2-NEXT: movq 48(%rdi), %r9 -; SSE2-NEXT: movq 32(%rdi), %r10 -; SSE2-NEXT: movq 56(%rdi), %r11 -; SSE2-NEXT: movq 40(%rdi), %rdi -; SSE2-NEXT: xorq 40(%rsi), %rdi -; SSE2-NEXT: orq %r8, %rdi -; SSE2-NEXT: xorq 56(%rsi), %r11 -; SSE2-NEXT: orq %rcx, %r11 -; SSE2-NEXT: orq %rdi, %r11 -; SSE2-NEXT: xorq 32(%rsi), %r10 -; SSE2-NEXT: orq %rdx, %r10 -; SSE2-NEXT: xorq 48(%rsi), %r9 -; SSE2-NEXT: orq %rax, %r9 -; SSE2-NEXT: orq %r10, %r9 -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: orq %r11, %r9 -; SSE2-NEXT: setne %al -; SSE2-NEXT: retq -; -; SSE41-LABEL: ne_i256_pair: -; SSE41: # %bb.0: -; SSE41-NEXT: movq 16(%rdi), %rax -; SSE41-NEXT: movq 24(%rdi), %rcx -; SSE41-NEXT: movq (%rdi), %rdx -; SSE41-NEXT: movq 8(%rdi), %r8 -; SSE41-NEXT: xorq 8(%rsi), %r8 -; SSE41-NEXT: xorq 24(%rsi), %rcx -; SSE41-NEXT: xorq (%rsi), %rdx -; SSE41-NEXT: xorq 16(%rsi), %rax -; SSE41-NEXT: movq 48(%rdi), %r9 -; SSE41-NEXT: movq 32(%rdi), %r10 -; SSE41-NEXT: movq 56(%rdi), %r11 -; SSE41-NEXT: movq 40(%rdi), %rdi -; SSE41-NEXT: xorq 40(%rsi), %rdi -; SSE41-NEXT: orq %r8, %rdi -; SSE41-NEXT: xorq 56(%rsi), %r11 -; SSE41-NEXT: orq %rcx, %r11 -; SSE41-NEXT: orq %rdi, %r11 -; SSE41-NEXT: xorq 32(%rsi), %r10 -; SSE41-NEXT: orq %rdx, %r10 -; SSE41-NEXT: xorq 48(%rsi), %r9 -; SSE41-NEXT: orq %rax, %r9 -; SSE41-NEXT: orq %r10, %r9 -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: orq %r11, %r9 -; SSE41-NEXT: setne %al -; SSE41-NEXT: retq +; SSE-LABEL: ne_i256_pair: +; SSE: # %bb.0: +; SSE-NEXT: movq 16(%rdi), %rax +; SSE-NEXT: movq 24(%rdi), %rcx +; SSE-NEXT: movq (%rdi), %rdx +; SSE-NEXT: movq 8(%rdi), %r8 +; SSE-NEXT: xorq 8(%rsi), %r8 +; SSE-NEXT: xorq 24(%rsi), %rcx +; SSE-NEXT: xorq (%rsi), %rdx +; SSE-NEXT: xorq 16(%rsi), %rax +; SSE-NEXT: movq 48(%rdi), %r9 +; SSE-NEXT: movq 32(%rdi), %r10 +; SSE-NEXT: movq 56(%rdi), %r11 +; SSE-NEXT: movq 40(%rdi), %rdi +; SSE-NEXT: xorq 40(%rsi), %rdi +; SSE-NEXT: orq %r8, %rdi +; SSE-NEXT: xorq 56(%rsi), %r11 +; SSE-NEXT: orq %rcx, %r11 +; SSE-NEXT: orq %rdi, %r11 +; SSE-NEXT: xorq 32(%rsi), %r10 +; SSE-NEXT: orq %rdx, %r10 +; SSE-NEXT: xorq 48(%rsi), %r9 +; SSE-NEXT: orq %rax, %r9 +; SSE-NEXT: orq %r10, %r9 +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: orq %r11, %r9 +; SSE-NEXT: setne %al +; SSE-NEXT: retq ; ; AVX1-LABEL: ne_i256_pair: ; AVX1: # %bb.0: @@ -848,63 +819,34 @@ define i32 @ne_i256_pair(ptr %a, ptr %b) { ; if we allowed 2 pairs of 32-byte loads per block. define i32 @eq_i256_pair(ptr %a, ptr %b) { -; SSE2-LABEL: eq_i256_pair: -; SSE2: # %bb.0: -; SSE2-NEXT: movq 16(%rdi), %rax -; SSE2-NEXT: movq 24(%rdi), %rcx -; SSE2-NEXT: movq (%rdi), %rdx -; SSE2-NEXT: movq 8(%rdi), %r8 -; SSE2-NEXT: xorq 8(%rsi), %r8 -; SSE2-NEXT: xorq 24(%rsi), %rcx -; SSE2-NEXT: xorq (%rsi), %rdx -; SSE2-NEXT: xorq 16(%rsi), %rax -; SSE2-NEXT: movq 48(%rdi), %r9 -; SSE2-NEXT: movq 32(%rdi), %r10 -; SSE2-NEXT: movq 56(%rdi), %r11 -; SSE2-NEXT: movq 40(%rdi), %rdi -; SSE2-NEXT: xorq 40(%rsi), %rdi -; SSE2-NEXT: orq %r8, %rdi -; SSE2-NEXT: xorq 56(%rsi), %r11 -; SSE2-NEXT: orq %rcx, %r11 -; SSE2-NEXT: orq %rdi, %r11 -; SSE2-NEXT: xorq 32(%rsi), %r10 -; SSE2-NEXT: orq %rdx, %r10 -; SSE2-NEXT: xorq 48(%rsi), %r9 -; SSE2-NEXT: orq %rax, %r9 -; SSE2-NEXT: orq %r10, %r9 -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: orq %r11, %r9 -; SSE2-NEXT: sete %al -; SSE2-NEXT: retq -; -; SSE41-LABEL: eq_i256_pair: -; SSE41: # %bb.0: -; SSE41-NEXT: movq 16(%rdi), %rax -; SSE41-NEXT: movq 24(%rdi), %rcx -; SSE41-NEXT: movq (%rdi), %rdx -; SSE41-NEXT: movq 8(%rdi), %r8 -; SSE41-NEXT: xorq 8(%rsi), %r8 -; SSE41-NEXT: xorq 24(%rsi), %rcx -; SSE41-NEXT: xorq (%rsi), %rdx -; SSE41-NEXT: xorq 16(%rsi), %rax -; SSE41-NEXT: movq 48(%rdi), %r9 -; SSE41-NEXT: movq 32(%rdi), %r10 -; SSE41-NEXT: movq 56(%rdi), %r11 -; SSE41-NEXT: movq 40(%rdi), %rdi -; SSE41-NEXT: xorq 40(%rsi), %rdi -; SSE41-NEXT: orq %r8, %rdi -; SSE41-NEXT: xorq 56(%rsi), %r11 -; SSE41-NEXT: orq %rcx, %r11 -; SSE41-NEXT: orq %rdi, %r11 -; SSE41-NEXT: xorq 32(%rsi), %r10 -; SSE41-NEXT: orq %rdx, %r10 -; SSE41-NEXT: xorq 48(%rsi), %r9 -; SSE41-NEXT: orq %rax, %r9 -; SSE41-NEXT: orq %r10, %r9 -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: orq %r11, %r9 -; SSE41-NEXT: sete %al -; SSE41-NEXT: retq +; SSE-LABEL: eq_i256_pair: +; SSE: # %bb.0: +; SSE-NEXT: movq 16(%rdi), %rax +; SSE-NEXT: movq 24(%rdi), %rcx +; SSE-NEXT: movq (%rdi), %rdx +; SSE-NEXT: movq 8(%rdi), %r8 +; SSE-NEXT: xorq 8(%rsi), %r8 +; SSE-NEXT: xorq 24(%rsi), %rcx +; SSE-NEXT: xorq (%rsi), %rdx +; SSE-NEXT: xorq 16(%rsi), %rax +; SSE-NEXT: movq 48(%rdi), %r9 +; SSE-NEXT: movq 32(%rdi), %r10 +; SSE-NEXT: movq 56(%rdi), %r11 +; SSE-NEXT: movq 40(%rdi), %rdi +; SSE-NEXT: xorq 40(%rsi), %rdi +; SSE-NEXT: orq %r8, %rdi +; SSE-NEXT: xorq 56(%rsi), %r11 +; SSE-NEXT: orq %rcx, %r11 +; SSE-NEXT: orq %rdi, %r11 +; SSE-NEXT: xorq 32(%rsi), %r10 +; SSE-NEXT: orq %rdx, %r10 +; SSE-NEXT: xorq 48(%rsi), %r9 +; SSE-NEXT: orq %rax, %r9 +; SSE-NEXT: orq %r10, %r9 +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: orq %r11, %r9 +; SSE-NEXT: sete %al +; SSE-NEXT: retq ; ; AVX1-LABEL: eq_i256_pair: ; AVX1: # %bb.0: @@ -1238,35 +1180,65 @@ define i1 @eq_i256_op(i256 %a, i256 %b) { } define i1 @eq_i512_op(i512 %a, i512 %b) { -; ANY-LABEL: eq_i512_op: -; ANY: # %bb.0: -; ANY-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; ANY-NEXT: movq {{[0-9]+}}(%rsp), %rax -; ANY-NEXT: addq $1, %rdi -; ANY-NEXT: adcq $0, %rsi -; ANY-NEXT: adcq $0, %rdx -; ANY-NEXT: adcq $0, %rcx -; ANY-NEXT: adcq $0, %r8 -; ANY-NEXT: adcq $0, %r9 -; ANY-NEXT: adcq $0, %r10 -; ANY-NEXT: adcq $0, %rax -; ANY-NEXT: xorq {{[0-9]+}}(%rsp), %rsi -; ANY-NEXT: xorq {{[0-9]+}}(%rsp), %r9 -; ANY-NEXT: orq %rsi, %r9 -; ANY-NEXT: xorq {{[0-9]+}}(%rsp), %rcx -; ANY-NEXT: xorq {{[0-9]+}}(%rsp), %rax -; ANY-NEXT: orq %rcx, %rax -; ANY-NEXT: orq %r9, %rax -; ANY-NEXT: xorq {{[0-9]+}}(%rsp), %rdx -; ANY-NEXT: xorq {{[0-9]+}}(%rsp), %r10 -; ANY-NEXT: orq %rdx, %r10 -; ANY-NEXT: xorq {{[0-9]+}}(%rsp), %r8 -; ANY-NEXT: xorq {{[0-9]+}}(%rsp), %rdi -; ANY-NEXT: orq %r8, %rdi -; ANY-NEXT: orq %r10, %rdi -; ANY-NEXT: orq %rax, %rdi -; ANY-NEXT: sete %al -; ANY-NEXT: retq +; SSE-LABEL: eq_i512_op: +; SSE: # %bb.0: +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; SSE-NEXT: addq $1, %rdi +; SSE-NEXT: adcq $0, %rsi +; SSE-NEXT: adcq $0, %rdx +; SSE-NEXT: adcq $0, %rcx +; SSE-NEXT: adcq $0, %r8 +; SSE-NEXT: adcq $0, %r9 +; SSE-NEXT: adcq $0, %r10 +; SSE-NEXT: adcq $0, %rax +; SSE-NEXT: xorq {{[0-9]+}}(%rsp), %rsi +; SSE-NEXT: xorq {{[0-9]+}}(%rsp), %r9 +; SSE-NEXT: orq %rsi, %r9 +; SSE-NEXT: xorq {{[0-9]+}}(%rsp), %rcx +; SSE-NEXT: xorq {{[0-9]+}}(%rsp), %rax +; SSE-NEXT: orq %rcx, %rax +; SSE-NEXT: orq %r9, %rax +; SSE-NEXT: xorq {{[0-9]+}}(%rsp), %rdx +; SSE-NEXT: xorq {{[0-9]+}}(%rsp), %r10 +; SSE-NEXT: orq %rdx, %r10 +; SSE-NEXT: xorq {{[0-9]+}}(%rsp), %r8 +; SSE-NEXT: xorq {{[0-9]+}}(%rsp), %rdi +; SSE-NEXT: orq %r8, %rdi +; SSE-NEXT: orq %r10, %rdi +; SSE-NEXT: orq %rax, %rdi +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVXANY-LABEL: eq_i512_op: +; AVXANY: # %bb.0: +; AVXANY-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; AVXANY-NEXT: movq {{[0-9]+}}(%rsp), %rax +; AVXANY-NEXT: addq $1, %rdi +; AVXANY-NEXT: adcq $0, %rsi +; AVXANY-NEXT: adcq $0, %rdx +; AVXANY-NEXT: adcq $0, %rcx +; AVXANY-NEXT: adcq $0, %r8 +; AVXANY-NEXT: adcq $0, %r9 +; AVXANY-NEXT: adcq $0, %r10 +; AVXANY-NEXT: adcq $0, %rax +; AVXANY-NEXT: xorq {{[0-9]+}}(%rsp), %rsi +; AVXANY-NEXT: xorq {{[0-9]+}}(%rsp), %r9 +; AVXANY-NEXT: orq %rsi, %r9 +; AVXANY-NEXT: xorq {{[0-9]+}}(%rsp), %rcx +; AVXANY-NEXT: xorq {{[0-9]+}}(%rsp), %rax +; AVXANY-NEXT: orq %rcx, %rax +; AVXANY-NEXT: orq %r9, %rax +; AVXANY-NEXT: xorq {{[0-9]+}}(%rsp), %rdx +; AVXANY-NEXT: xorq {{[0-9]+}}(%rsp), %r10 +; AVXANY-NEXT: orq %rdx, %r10 +; AVXANY-NEXT: xorq {{[0-9]+}}(%rsp), %r8 +; AVXANY-NEXT: xorq {{[0-9]+}}(%rsp), %rdi +; AVXANY-NEXT: orq %r8, %rdi +; AVXANY-NEXT: orq %r10, %rdi +; AVXANY-NEXT: orq %rax, %rdi +; AVXANY-NEXT: sete %al +; AVXANY-NEXT: retq %a2 = add i512 %a, 1 %r = icmp eq i512 %a2, %b ret i1 %r diff --git a/llvm/test/CodeGen/X86/smin.ll b/llvm/test/CodeGen/X86/smin.ll index e7b318c..f353853 100644 --- a/llvm/test/CodeGen/X86/smin.ll +++ b/llvm/test/CodeGen/X86/smin.ll @@ -158,24 +158,24 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: subl $8, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: cmpl %ecx, %edi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: cmpl %edx, %edi +; X86-NEXT: movl %edx, %eax ; X86-NEXT: cmovbl %edi, %eax ; X86-NEXT: cmpl %esi, %ebp -; X86-NEXT: movl %ecx, %ebx +; X86-NEXT: movl %edx, %ebx ; X86-NEXT: cmovbl %edi, %ebx ; X86-NEXT: cmovel %eax, %ebx ; X86-NEXT: movl %esi, %eax ; X86-NEXT: cmovbl %ebp, %eax ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmpl %edx, %edi -; X86-NEXT: movl %edx, %eax +; X86-NEXT: cmpl %ecx, %edi +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: cmovbl %edi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -183,24 +183,24 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind { ; X86-NEXT: movl %eax, %ebp ; X86-NEXT: sbbl %edi, %ebp ; X86-NEXT: cmovll {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx +; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %eax, %ebp ; X86-NEXT: xorl %edi, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %edx, %eax +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: orl %ebp, %eax -; X86-NEXT: cmovel %ebx, %ecx +; X86-NEXT: cmovel %ebx, %edx ; X86-NEXT: cmovel (%esp), %esi # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: cmpl %edi, %eax -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edx -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: cmovll %eax, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %edi, 12(%eax) -; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %ecx, 8(%eax) ; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: movl %edx, (%eax) ; X86-NEXT: addl $8, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/smul-with-overflow.ll b/llvm/test/CodeGen/X86/smul-with-overflow.ll index fb7bc96..fbdb6e7 100644 --- a/llvm/test/CodeGen/X86/smul-with-overflow.ll +++ b/llvm/test/CodeGen/X86/smul-with-overflow.ll @@ -441,8 +441,8 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind { ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: movl %eax, %ebp ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %ebx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, %ebx ; X86-NEXT: movl %eax, %edi diff --git a/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll index fbbc857..a3d94f7 100644 --- a/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll @@ -89,8 +89,8 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { ; X86-NEXT: .cfi_offset %edi, -16 ; X86-NEXT: .cfi_offset %ebx, -12 ; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl %edi, %eax ; X86-NEXT: mull %ebx @@ -251,10 +251,10 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { ; X86-NEXT: addl %eax, %esi ; X86-NEXT: adcl %edx, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: sarl $31, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sarl $31, %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: mull %ecx ; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %eax, %ebx ; X86-NEXT: movl %eax, %ebp @@ -585,8 +585,8 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: .cfi_offset %edi, -16 ; X86-NEXT: .cfi_offset %ebx, -12 ; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl %ebp, %eax ; X86-NEXT: mull %ebx @@ -1295,8 +1295,8 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: addl %edx, %ecx ; X86-NEXT: adcl $0, %esi -; X86-NEXT: movl %edi, %eax -; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: addl %eax, %ecx @@ -1315,9 +1315,9 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: adcl $0, %esi ; X86-NEXT: adcl $0, %ebx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl %edi, %eax ; X86-NEXT: movl %edi, %ecx ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; X86-NEXT: movl %edi, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: movl %edx, %ebp ; X86-NEXT: movl %eax, %ebx @@ -1379,9 +1379,9 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) { ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; X86-NEXT: adcl %ebp, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload -; X86-NEXT: mull %ebp +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: movl %edx, %esi ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax diff --git a/llvm/test/CodeGen/X86/umin.ll b/llvm/test/CodeGen/X86/umin.ll index e37950e..0a747b8 100644 --- a/llvm/test/CodeGen/X86/umin.ll +++ b/llvm/test/CodeGen/X86/umin.ll @@ -154,24 +154,24 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: subl $8, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: cmpl %ecx, %edi -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: cmpl %edx, %edi +; X86-NEXT: movl %edx, %eax ; X86-NEXT: cmovbl %edi, %eax ; X86-NEXT: cmpl %esi, %ebp -; X86-NEXT: movl %ecx, %ebx +; X86-NEXT: movl %edx, %ebx ; X86-NEXT: cmovbl %edi, %ebx ; X86-NEXT: cmovel %eax, %ebx ; X86-NEXT: movl %esi, %eax ; X86-NEXT: cmovbl %ebp, %eax ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmpl %edx, %edi -; X86-NEXT: movl %edx, %eax +; X86-NEXT: cmpl %ecx, %edi +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: cmovbl %edi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -179,24 +179,24 @@ define i128 @test_i128(i128 %a, i128 %b) nounwind { ; X86-NEXT: movl %eax, %ebp ; X86-NEXT: sbbl %edi, %ebp ; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %eax, %ebp ; X86-NEXT: xorl %edi, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: xorl %edx, %eax +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: orl %ebp, %eax -; X86-NEXT: cmovel %ebx, %ecx +; X86-NEXT: cmovel %ebx, %edx ; X86-NEXT: cmovel (%esp), %esi # 4-byte Folded Reload ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: cmpl %edi, %eax -; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: cmovbl %eax, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %edi, 12(%eax) -; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %ecx, 8(%eax) ; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: movl %edx, (%eax) ; X86-NEXT: addl $8, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/umul-with-overflow.ll b/llvm/test/CodeGen/X86/umul-with-overflow.ll index ccd27dd..f5248d8 100644 --- a/llvm/test/CodeGen/X86/umul-with-overflow.ll +++ b/llvm/test/CodeGen/X86/umul-with-overflow.ll @@ -87,8 +87,8 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %ebx, %eax ; X86-NEXT: mull %edi @@ -532,8 +532,8 @@ define i300 @test4(i300 %a, i300 %b) nounwind { ; X64-NEXT: movq %r8, %r11 ; X64-NEXT: movq %rcx, %r8 ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 ; X64-NEXT: movq %rsi, %rax ; X64-NEXT: mulq %r10 ; X64-NEXT: movq %rdx, %rbx diff --git a/llvm/test/CodeGen/X86/wide-integer-cmp.ll b/llvm/test/CodeGen/X86/wide-integer-cmp.ll index a15d633..189f516 100644 --- a/llvm/test/CodeGen/X86/wide-integer-cmp.ll +++ b/llvm/test/CodeGen/X86/wide-integer-cmp.ll @@ -99,8 +99,8 @@ define i32 @test_wide(i128 %a, i128 %b) { ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: sbbl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: jge .LBB4_2 ; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: movl $1, %eax diff --git a/llvm/test/CodeGen/X86/xaluo128.ll b/llvm/test/CodeGen/X86/xaluo128.ll index 977df0f..740a2dd 100644 --- a/llvm/test/CodeGen/X86/xaluo128.ll +++ b/llvm/test/CodeGen/X86/xaluo128.ll @@ -24,13 +24,13 @@ define zeroext i1 @saddoi128(i128 %v1, i128 %v2, ptr %res) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: addl {{[0-9]+}}(%esp), %edi ; X86-NEXT: adcl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi ; X86-NEXT: seto %al ; X86-NEXT: movl %edi, (%ecx) ; X86-NEXT: movl %ebx, 4(%ecx) -; X86-NEXT: movl %esi, 8(%ecx) -; X86-NEXT: movl %edx, 12(%ecx) +; X86-NEXT: movl %edx, 8(%ecx) +; X86-NEXT: movl %esi, 12(%ecx) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -64,13 +64,13 @@ define zeroext i1 @uaddoi128(i128 %v1, i128 %v2, ptr %res) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: addl {{[0-9]+}}(%esp), %edi ; X86-NEXT: adcl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi ; X86-NEXT: setb %al ; X86-NEXT: movl %edi, (%ecx) ; X86-NEXT: movl %ebx, 4(%ecx) -; X86-NEXT: movl %esi, 8(%ecx) -; X86-NEXT: movl %edx, 12(%ecx) +; X86-NEXT: movl %edx, 8(%ecx) +; X86-NEXT: movl %esi, 12(%ecx) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -105,13 +105,13 @@ define zeroext i1 @ssuboi128(i128 %v1, i128 %v2, ptr %res) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: subl {{[0-9]+}}(%esp), %edi ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi ; X86-NEXT: seto %al ; X86-NEXT: movl %edi, (%ecx) ; X86-NEXT: movl %ebx, 4(%ecx) -; X86-NEXT: movl %esi, 8(%ecx) -; X86-NEXT: movl %edx, 12(%ecx) +; X86-NEXT: movl %edx, 8(%ecx) +; X86-NEXT: movl %esi, 12(%ecx) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -145,13 +145,13 @@ define zeroext i1 @usuboi128(i128 %v1, i128 %v2, ptr %res) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: subl {{[0-9]+}}(%esp), %edi ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi ; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi ; X86-NEXT: setb %al ; X86-NEXT: movl %edi, (%ecx) ; X86-NEXT: movl %ebx, 4(%ecx) -; X86-NEXT: movl %esi, 8(%ecx) -; X86-NEXT: movl %edx, 12(%ecx) +; X86-NEXT: movl %edx, 8(%ecx) +; X86-NEXT: movl %esi, 12(%ecx) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -- 2.7.4