From e431b280c9aedfd405ec248fbb934bd88863dd2c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 24 Aug 2021 12:30:51 +0100 Subject: [PATCH] [DAG] CombineConsecutiveLoads - replace getABITypeAlign with allowsMemoryAccess (PR45116) One of the cases identified in PR45116 - we don't need to limit load combines (in this case for ISD::BUILD_PAIR) to ABI alignment, we can use allowsMemoryAccess - which tests using getABITypeAlign, but also checks if a target permits (fast) misaligned memory loads by checking allowsMisalignedMemoryAccesses as a fallback. This helps in particular for 32-bit X86 cases loading 64-bit size data, reducing codegen diffs vs x86_64. Differential Revision: https://reviews.llvm.org/D108307 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 17 +- llvm/test/CodeGen/X86/avx512-mask-op.ll | 2 +- .../CodeGen/X86/avx512bw-intrinsics-upgrade.ll | 172 ++++++++------------- .../CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll | 2 +- llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll | 2 +- llvm/test/CodeGen/X86/pr35982.ll | 104 +++++++------ .../X86/vector-shuffle-combining-avx512bw.ll | 8 +- .../X86/vector-shuffle-combining-avx512vbmi.ll | 2 +- llvm/test/CodeGen/X86/xmulo.ll | 12 +- 9 files changed, 148 insertions(+), 173 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 839787e..5a59c50 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12566,18 +12566,15 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); + bool LD1Fast = false; EVT LD1VT = LD1->getValueType(0); unsigned LD1Bytes = LD1VT.getStoreSize(); - if (DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { - Align Alignment = LD1->getAlign(); - Align NewAlign = DAG.getDataLayout().getABITypeAlign( - VT.getTypeForEVT(*DAG.getContext())); - - if (NewAlign <= Alignment && - (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) - return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), - LD1->getPointerInfo(), Alignment); - } + if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && + DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) && + TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + *LD1->getMemOperand(), &LD1Fast) && LD1Fast) + return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), + LD1->getPointerInfo(), LD1->getAlign()); return SDValue(); } diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 6ebe6a3..11dee95 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -1271,8 +1271,8 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { ; ; X86-LABEL: test17: ; X86: ## %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax ; X86-NEXT: setg %al ; X86-NEXT: kshiftrq $6, %k0, %k1 diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll index ac346c5..d47e7be 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -49,8 +49,8 @@ declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64) define <64 x i8> @test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) nounwind { ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0x4c,0x24,0x04] ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0x4c,0x24,0x04] ; X86-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0xc1] ; X86-NEXT: vmovdqu8 %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0xd1] ; X86-NEXT: vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2] @@ -109,8 +109,8 @@ define void @test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i ; X86-LABEL: test_int_x86_avx512_mask_storeu_b_512: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c] +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] ; X86-NEXT: vmovdqu8 %zmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x7f,0x01] ; X86-NEXT: vmovdqu64 %zmm0, (%eax) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] @@ -188,9 +188,9 @@ define <64 x i8> @test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 ; X86-LABEL: test_int_x86_avx512_mask_loadu_b_512: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] +; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] ; X86-NEXT: vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01] -; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c] ; X86-NEXT: vmovdqu8 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0x00] ; X86-NEXT: vmovdqu8 (%ecx), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x09] ; X86-NEXT: vpaddb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1] @@ -1937,66 +1937,47 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind { define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwind { ; X86-LABEL: test_mask_cmp_b_512: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp # encoding: [0x55] -; X86-NEXT: pushl %ebx # encoding: [0x53] ; X86-NEXT: pushl %edi # encoding: [0x57] ; X86-NEXT: pushl %esi # encoding: [0x56] -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x14] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x18] -; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xd1] -; X86-NEXT: kmovd %esi, %k0 # encoding: [0xc5,0xfb,0x92,0xc6] -; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] -; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] -; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] -; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] +; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c] +; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1] +; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20] +; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] +; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] +; X86-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xc0] +; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20] ; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2] -; X86-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xd0] -; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] -; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] -; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb] -; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] -; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda] -; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3] -; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7] -; X86-NEXT: vpcmpleb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x02] -; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] -; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] -; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] -; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] +; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] +; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce] +; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2] +; X86-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x02] +; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20] +; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] +; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] +; X86-NEXT: addl %esi, %ecx # encoding: [0x01,0xf1] +; X86-NEXT: adcl %edx, %eax # encoding: [0x11,0xd0] +; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04] +; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20] ; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2] -; X86-NEXT: addl %ebx, %edx # encoding: [0x01,0xda] -; X86-NEXT: adcl %edi, %eax # encoding: [0x11,0xf8] -; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x04] -; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] -; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] -; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb] -; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] -; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda] -; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3] -; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7] -; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x05] -; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] -; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] -; X86-NEXT: kmovd %k3, %ebp # encoding: [0xc5,0xfb,0x93,0xeb] -; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] +; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] +; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce] +; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2] +; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x05] +; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20] ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] -; X86-NEXT: addl %ebx, %ecx # encoding: [0x01,0xd9] -; X86-NEXT: adcl %edi, %ebp # encoding: [0x11,0xfd] -; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xd1] -; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] -; X86-NEXT: kandd %k1, %k3, %k1 # encoding: [0xc4,0xe1,0xe5,0x41,0xc9] +; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] +; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7] +; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1] +; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x64,0xc1] +; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] -; X86-NEXT: kandd %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x41,0xc0] ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] -; X86-NEXT: adcl %ebp, %edx # encoding: [0x11,0xea] -; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] -; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x18] +; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] +; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca] +; X86-NEXT: addl {{[0-9]+}}(%esp), %eax # encoding: [0x03,0x44,0x24,0x0c] +; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x10] ; X86-NEXT: popl %esi # encoding: [0x5e] ; X86-NEXT: popl %edi # encoding: [0x5f] -; X86-NEXT: popl %ebx # encoding: [0x5b] -; X86-NEXT: popl %ebp # encoding: [0x5d] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; @@ -2131,66 +2112,47 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind { define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwind { ; X86-LABEL: test_mask_x86_avx512_ucmp_b_512: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp # encoding: [0x55] -; X86-NEXT: pushl %ebx # encoding: [0x53] ; X86-NEXT: pushl %edi # encoding: [0x57] ; X86-NEXT: pushl %esi # encoding: [0x56] -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x14] -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x18] -; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xd1] -; X86-NEXT: kmovd %esi, %k0 # encoding: [0xc5,0xfb,0x92,0xc6] -; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] -; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] -; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] -; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] +; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c] +; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1] +; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20] +; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] +; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] +; X86-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x01] +; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20] ; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2] -; X86-NEXT: vpcmpltub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x01] -; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] -; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] -; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb] -; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] -; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda] -; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3] -; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7] -; X86-NEXT: vpcmpleub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x02] -; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] -; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] -; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] -; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] +; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] +; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce] +; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2] +; X86-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x02] +; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20] +; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2] +; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] +; X86-NEXT: addl %esi, %ecx # encoding: [0x01,0xf1] +; X86-NEXT: adcl %edx, %eax # encoding: [0x11,0xd0] +; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04] +; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20] ; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2] -; X86-NEXT: addl %ebx, %edx # encoding: [0x01,0xda] -; X86-NEXT: adcl %edi, %eax # encoding: [0x11,0xf8] -; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x04] -; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] -; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] -; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb] -; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] -; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda] -; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3] -; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7] -; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x05] -; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] -; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] -; X86-NEXT: kmovd %k3, %ebp # encoding: [0xc5,0xfb,0x93,0xeb] -; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] +; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] +; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce] +; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2] +; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x05] +; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20] ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] -; X86-NEXT: addl %ebx, %ecx # encoding: [0x01,0xd9] -; X86-NEXT: adcl %edi, %ebp # encoding: [0x11,0xfd] -; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x06] -; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] -; X86-NEXT: kandd %k1, %k3, %k1 # encoding: [0xc4,0xe1,0xe5,0x41,0xc9] +; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] +; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7] +; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1] +; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x06] +; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] -; X86-NEXT: kandd %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x41,0xc0] ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] -; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] -; X86-NEXT: adcl %ebp, %edx # encoding: [0x11,0xea] -; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] -; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x18] +; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] +; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca] +; X86-NEXT: addl {{[0-9]+}}(%esp), %eax # encoding: [0x03,0x44,0x24,0x0c] +; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x10] ; X86-NEXT: popl %esi # encoding: [0x5e] ; X86-NEXT: popl %edi # encoding: [0x5f] -; X86-NEXT: popl %ebx # encoding: [0x5b] -; X86-NEXT: popl %ebp # encoding: [0x5d] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; diff --git a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll index dc34be5..eb30f0c 100644 --- a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll @@ -283,8 +283,8 @@ declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) { ; X86-LABEL: test_mask_compress_store_b_512: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll index c1e1642..e6db088 100644 --- a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll @@ -287,8 +287,8 @@ define <32 x i16> @test_compress_w_512(<32 x i16> %data) { define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) { ; X86-LABEL: test_mask_compress_store_b_512: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/pr35982.ll b/llvm/test/CodeGen/X86/pr35982.ll index 623fcc6..4a79a10 100644 --- a/llvm/test/CodeGen/X86/pr35982.ll +++ b/llvm/test/CodeGen/X86/pr35982.ll @@ -1,29 +1,37 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnowa -post-RA-scheduler=false | FileCheck %s -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnowa -post-RA-scheduler=true | FileCheck %s +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnowa -post-RA-scheduler=false | FileCheck %s --check-prefix=NO-POSTRA +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnowa -post-RA-scheduler=true | FileCheck %s --check-prefix=POSTRA define float @PR35982_emms(<1 x i64>) nounwind { -; CHECK-LABEL: PR35982_emms: -; CHECK: # %bb.0: -; CHECK-NEXT: pushl %ebp -; CHECK-NEXT: movl %esp, %ebp -; CHECK-NEXT: andl $-8, %esp -; CHECK-NEXT: subl $16, %esp -; CHECK-NEXT: movl 8(%ebp), %eax -; CHECK-NEXT: movl 12(%ebp), %ecx -; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) -; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; CHECK-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1] -; CHECK-NEXT: movd %mm0, %ecx -; CHECK-NEXT: emms -; CHECK-NEXT: movl %eax, (%esp) -; CHECK-NEXT: fildl (%esp) -; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; CHECK-NEXT: fiaddl {{[0-9]+}}(%esp) -; CHECK-NEXT: movl %ebp, %esp -; CHECK-NEXT: popl %ebp -; CHECK-NEXT: retl +; NO-POSTRA-LABEL: PR35982_emms: +; NO-POSTRA: # %bb.0: +; NO-POSTRA-NEXT: subl $8, %esp +; NO-POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax +; NO-POSTRA-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; NO-POSTRA-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1] +; NO-POSTRA-NEXT: movd %mm0, %ecx +; NO-POSTRA-NEXT: emms +; NO-POSTRA-NEXT: movl %eax, (%esp) +; NO-POSTRA-NEXT: fildl (%esp) +; NO-POSTRA-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; NO-POSTRA-NEXT: fiaddl {{[0-9]+}}(%esp) +; NO-POSTRA-NEXT: addl $8, %esp +; NO-POSTRA-NEXT: retl +; +; POSTRA-LABEL: PR35982_emms: +; POSTRA: # %bb.0: +; POSTRA-NEXT: subl $8, %esp +; POSTRA-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax +; POSTRA-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1] +; POSTRA-NEXT: movd %mm0, %ecx +; POSTRA-NEXT: emms +; POSTRA-NEXT: movl %eax, (%esp) +; POSTRA-NEXT: fildl (%esp) +; POSTRA-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; POSTRA-NEXT: fiaddl {{[0-9]+}}(%esp) +; POSTRA-NEXT: addl $8, %esp +; POSTRA-NEXT: retl %2 = bitcast <1 x i64> %0 to <2 x i32> %3 = extractelement <2 x i32> %2, i32 0 %4 = extractelement <1 x i64> %0, i32 0 @@ -39,27 +47,35 @@ define float @PR35982_emms(<1 x i64>) nounwind { } define float @PR35982_femms(<1 x i64>) nounwind { -; CHECK-LABEL: PR35982_femms: -; CHECK: # %bb.0: -; CHECK-NEXT: pushl %ebp -; CHECK-NEXT: movl %esp, %ebp -; CHECK-NEXT: andl $-8, %esp -; CHECK-NEXT: subl $16, %esp -; CHECK-NEXT: movl 8(%ebp), %eax -; CHECK-NEXT: movl 12(%ebp), %ecx -; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) -; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; CHECK-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1] -; CHECK-NEXT: movd %mm0, %ecx -; CHECK-NEXT: femms -; CHECK-NEXT: movl %eax, (%esp) -; CHECK-NEXT: fildl (%esp) -; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; CHECK-NEXT: fiaddl {{[0-9]+}}(%esp) -; CHECK-NEXT: movl %ebp, %esp -; CHECK-NEXT: popl %ebp -; CHECK-NEXT: retl +; NO-POSTRA-LABEL: PR35982_femms: +; NO-POSTRA: # %bb.0: +; NO-POSTRA-NEXT: subl $8, %esp +; NO-POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax +; NO-POSTRA-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; NO-POSTRA-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1] +; NO-POSTRA-NEXT: movd %mm0, %ecx +; NO-POSTRA-NEXT: femms +; NO-POSTRA-NEXT: movl %eax, (%esp) +; NO-POSTRA-NEXT: fildl (%esp) +; NO-POSTRA-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; NO-POSTRA-NEXT: fiaddl {{[0-9]+}}(%esp) +; NO-POSTRA-NEXT: addl $8, %esp +; NO-POSTRA-NEXT: retl +; +; POSTRA-LABEL: PR35982_femms: +; POSTRA: # %bb.0: +; POSTRA-NEXT: subl $8, %esp +; POSTRA-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax +; POSTRA-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1] +; POSTRA-NEXT: movd %mm0, %ecx +; POSTRA-NEXT: femms +; POSTRA-NEXT: movl %eax, (%esp) +; POSTRA-NEXT: fildl (%esp) +; POSTRA-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; POSTRA-NEXT: fiaddl {{[0-9]+}}(%esp) +; POSTRA-NEXT: addl $8, %esp +; POSTRA-NEXT: retl %2 = bitcast <1 x i64> %0 to <2 x i32> %3 = extractelement <2 x i32> %2, i32 0 %4 = extractelement <1 x i64> %0, i32 0 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll index d511591..442e7c4 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -54,9 +54,9 @@ define <64 x i8> @combine_pshufb_identity(<64 x i8> %x0) { define <64 x i8> @combine_pshufb_identity_mask(<64 x i8> %x0, i64 %m) { ; X86-LABEL: combine_pshufb_identity_mask: ; X86: # %bb.0: +; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; X86-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 ; X86-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] -; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; X86-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 ; X86-NEXT: vpshufb %zmm2, %zmm0, %zmm3 {%k1} ; X86-NEXT: vpshufb %zmm2, %zmm3, %zmm1 {%k1} @@ -100,8 +100,8 @@ define <64 x i8> @combine_pshufb_as_pslldq(<64 x i8> %a0) { define <64 x i8> @combine_pshufb_as_pslldq_mask(<64 x i8> %a0, i64 %m) { ; X86-LABEL: combine_pshufb_as_pslldq_mask: ; X86: # %bb.0: -; X86-NEXT: vpslldq {{.*#+}} zmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53] ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; X86-NEXT: vpslldq {{.*#+}} zmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53] ; X86-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl ; @@ -126,8 +126,8 @@ define <64 x i8> @combine_pshufb_as_psrldq(<64 x i8> %a0) { define <64 x i8> @combine_pshufb_as_psrldq_mask(<64 x i8> %a0, i64 %m) { ; X86-LABEL: combine_pshufb_as_psrldq_mask: ; X86: # %bb.0: -; X86-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; X86-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X86-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl ; @@ -156,9 +156,9 @@ define <64 x i8> @combine_permi2q_pshufb_as_permi2d(<8 x i64> %a0, <8 x i64> %a1 define <64 x i8> @combine_permi2q_pshufb_as_permi2d_mask(<8 x i64> %a0, <8 x i64> %a1, i64 %m) { ; X86-LABEL: combine_permi2q_pshufb_as_permi2d_mask: ; X86: # %bb.0: +; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; X86-NEXT: vmovdqa64 {{.*#+}} zmm2 = <7,0,u,u,5,0,u,u,u,u,12,0,u,u,14,0> ; X86-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 -; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 ; X86-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm2[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,20,21,22,23,20,21,22,23,20,21,22,23,20,21,22,23,40,41,42,43,40,41,42,43,40,41,42,43,40,41,42,43,60,61,62,63,60,61,62,63,60,61,62,63,60,61,62,63] ; X86-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll index ef7d8fc..86c206c 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll @@ -128,8 +128,8 @@ define <64 x i8> @combine_permi2q_pshufb_as_permi2d(<8 x i64> %a0, <8 x i64> %a1 define <64 x i8> @combine_permi2q_pshufb_as_permi2d_mask(<8 x i64> %a0, <8 x i64> %a1, i64 %m) { ; X86-LABEL: combine_permi2q_pshufb_as_permi2d_mask: ; X86: # %bb.0: -; X86-NEXT: vmovdqa64 {{.*#+}} zmm2 = [56,57,58,59,56,57,58,59,56,57,58,59,56,57,58,59,44,45,46,47,44,45,46,47,44,45,46,47,44,45,46,47,96,97,98,99,96,97,98,99,96,97,98,99,96,97,98,99,116,117,118,119,116,117,118,119,116,117,118,119,116,117,118,119] ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; X86-NEXT: vmovdqa64 {{.*#+}} zmm2 = [56,57,58,59,56,57,58,59,56,57,58,59,56,57,58,59,44,45,46,47,44,45,46,47,44,45,46,47,44,45,46,47,96,97,98,99,96,97,98,99,96,97,98,99,96,97,98,99,116,117,118,119,116,117,118,119,116,117,118,119,116,117,118,119] ; X86-NEXT: vpermi2b %zmm0, %zmm1, %zmm2 {%k1} {z} ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll index 46bd004..4553c54 100644 --- a/llvm/test/CodeGen/X86/xmulo.ll +++ b/llvm/test/CodeGen/X86/xmulo.ll @@ -1197,23 +1197,23 @@ define zeroext i1 @umulobri64(i64 %v1, i64 %v2) { ; WIN32-NEXT: testl %esi, %esi ; WIN32-NEXT: setne %dl ; WIN32-NEXT: testl %eax, %eax -; WIN32-NEXT: setne %bl -; WIN32-NEXT: andb %dl, %bl +; WIN32-NEXT: setne %cl +; WIN32-NEXT: andb %dl, %cl ; WIN32-NEXT: mull {{[0-9]+}}(%esp) ; WIN32-NEXT: movl %eax, %edi -; WIN32-NEXT: seto %bh +; WIN32-NEXT: seto %bl ; WIN32-NEXT: movl %esi, %eax ; WIN32-NEXT: mull %ebp ; WIN32-NEXT: movl %eax, %esi -; WIN32-NEXT: seto %cl -; WIN32-NEXT: orb %bh, %cl +; WIN32-NEXT: seto %ch +; WIN32-NEXT: orb %bl, %ch ; WIN32-NEXT: addl %edi, %esi ; WIN32-NEXT: movl %ebp, %eax ; WIN32-NEXT: mull {{[0-9]+}}(%esp) ; WIN32-NEXT: addl %esi, %edx ; WIN32-NEXT: setb %al +; WIN32-NEXT: orb %ch, %al ; WIN32-NEXT: orb %cl, %al -; WIN32-NEXT: orb %bl, %al ; WIN32-NEXT: subb $1, %al ; WIN32-NEXT: je LBB22_1 ; WIN32-NEXT: # %bb.3: # %continue -- 2.7.4