define <64 x i8> @test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) nounwind {
; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
; X86: # %bb.0:
-; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0x4c,0x24,0x04]
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
+; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0x4c,0x24,0x04]
; X86-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0xc1]
; X86-NEXT: vmovdqu8 %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0xd1]
; X86-NEXT: vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2]
; X86-LABEL: test_int_x86_avx512_mask_storeu_b_512:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
; X86-NEXT: vmovdqu8 %zmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x7f,0x01]
; X86-NEXT: vmovdqu64 %zmm0, (%eax) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-LABEL: test_int_x86_avx512_mask_loadu_b_512:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
; X86-NEXT: vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
-; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
; X86-NEXT: vmovdqu8 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0x00]
; X86-NEXT: vmovdqu8 (%ecx), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x09]
; X86-NEXT: vpaddb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1]
define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwind {
; X86-LABEL: test_mask_cmp_b_512:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp # encoding: [0x55]
-; X86-NEXT: pushl %ebx # encoding: [0x53]
; X86-NEXT: pushl %edi # encoding: [0x57]
; X86-NEXT: pushl %esi # encoding: [0x56]
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x14]
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x18]
-; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xd1]
-; X86-NEXT: kmovd %esi, %k0 # encoding: [0xc5,0xfb,0x92,0xc6]
-; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
+; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
+; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
+; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
+; X86-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xc0]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
-; X86-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xd0]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
-; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
-; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3]
-; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7]
-; X86-NEXT: vpcmpleb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x02]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
+; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
+; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
+; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
+; X86-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x02]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
+; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
+; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
+; X86-NEXT: addl %esi, %ecx # encoding: [0x01,0xf1]
+; X86-NEXT: adcl %edx, %eax # encoding: [0x11,0xd0]
+; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
-; X86-NEXT: addl %ebx, %edx # encoding: [0x01,0xda]
-; X86-NEXT: adcl %edi, %eax # encoding: [0x11,0xf8]
-; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x04]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
-; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
-; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3]
-; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7]
-; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x05]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %ebp # encoding: [0xc5,0xfb,0x93,0xeb]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
+; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
+; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
+; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
+; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x05]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
-; X86-NEXT: addl %ebx, %ecx # encoding: [0x01,0xd9]
-; X86-NEXT: adcl %edi, %ebp # encoding: [0x11,0xfd]
-; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xd1]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k1 # encoding: [0xc4,0xe1,0xe5,0x41,0xc9]
+; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
+; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
+; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1]
+; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x64,0xc1]
+; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
-; X86-NEXT: kandd %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x41,0xc0]
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
-; X86-NEXT: adcl %ebp, %edx # encoding: [0x11,0xea]
-; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x18]
+; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
+; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax # encoding: [0x03,0x44,0x24,0x0c]
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x10]
; X86-NEXT: popl %esi # encoding: [0x5e]
; X86-NEXT: popl %edi # encoding: [0x5f]
-; X86-NEXT: popl %ebx # encoding: [0x5b]
-; X86-NEXT: popl %ebp # encoding: [0x5d]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwind {
; X86-LABEL: test_mask_x86_avx512_ucmp_b_512:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp # encoding: [0x55]
-; X86-NEXT: pushl %ebx # encoding: [0x53]
; X86-NEXT: pushl %edi # encoding: [0x57]
; X86-NEXT: pushl %esi # encoding: [0x56]
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x14]
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x18]
-; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xd1]
-; X86-NEXT: kmovd %esi, %k0 # encoding: [0xc5,0xfb,0x92,0xc6]
-; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
+; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
+; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
+; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
+; X86-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x01]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
-; X86-NEXT: vpcmpltub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x01]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
-; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
-; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3]
-; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7]
-; X86-NEXT: vpcmpleub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x02]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
+; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
+; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
+; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
+; X86-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x02]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
+; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
+; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
+; X86-NEXT: addl %esi, %ecx # encoding: [0x01,0xf1]
+; X86-NEXT: adcl %edx, %eax # encoding: [0x11,0xd0]
+; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
-; X86-NEXT: addl %ebx, %edx # encoding: [0x01,0xda]
-; X86-NEXT: adcl %edi, %eax # encoding: [0x11,0xf8]
-; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x04]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
-; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
-; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3]
-; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7]
-; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x05]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %ebp # encoding: [0xc5,0xfb,0x93,0xeb]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
+; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
+; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
+; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
+; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x05]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
-; X86-NEXT: addl %ebx, %ecx # encoding: [0x01,0xd9]
-; X86-NEXT: adcl %edi, %ebp # encoding: [0x11,0xfd]
-; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x06]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k1 # encoding: [0xc4,0xe1,0xe5,0x41,0xc9]
+; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
+; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
+; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1]
+; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x06]
+; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
-; X86-NEXT: kandd %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x41,0xc0]
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
-; X86-NEXT: adcl %ebp, %edx # encoding: [0x11,0xea]
-; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x18]
+; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
+; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax # encoding: [0x03,0x44,0x24,0x0c]
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x10]
; X86-NEXT: popl %esi # encoding: [0x5e]
; X86-NEXT: popl %edi # encoding: [0x5f]
-; X86-NEXT: popl %ebx # encoding: [0x5b]
-; X86-NEXT: popl %ebp # encoding: [0x5d]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
define <64 x i8> @combine_pshufb_identity_mask(<64 x i8> %x0, i64 %m) {
; X86-LABEL: combine_pshufb_identity_mask:
; X86: # %bb.0:
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; X86-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3
; X86-NEXT: vpshufb %zmm2, %zmm0, %zmm3 {%k1}
; X86-NEXT: vpshufb %zmm2, %zmm3, %zmm1 {%k1}
define <64 x i8> @combine_pshufb_as_pslldq_mask(<64 x i8> %a0, i64 %m) {
; X86-LABEL: combine_pshufb_as_pslldq_mask:
; X86: # %bb.0:
-; X86-NEXT: vpslldq {{.*#+}} zmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53]
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1
+; X86-NEXT: vpslldq {{.*#+}} zmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53]
; X86-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <64 x i8> @combine_pshufb_as_psrldq_mask(<64 x i8> %a0, i64 %m) {
; X86-LABEL: combine_pshufb_as_psrldq_mask:
; X86: # %bb.0:
-; X86-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1
+; X86-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
define <64 x i8> @combine_permi2q_pshufb_as_permi2d_mask(<8 x i64> %a0, <8 x i64> %a1, i64 %m) {
; X86-LABEL: combine_permi2q_pshufb_as_permi2d_mask:
; X86: # %bb.0:
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; X86-NEXT: vmovdqa64 {{.*#+}} zmm2 = <7,0,u,u,5,0,u,u,u,u,12,0,u,u,14,0>
; X86-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm2[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,20,21,22,23,20,21,22,23,20,21,22,23,20,21,22,23,40,41,42,43,40,41,42,43,40,41,42,43,40,41,42,43,60,61,62,63,60,61,62,63,60,61,62,63,60,61,62,63]
; X86-NEXT: retl
;