This implements the main suggested change from issue #56498.
Using the shorter (non-extending) instruction with only
-Oz ("minsize") rather than -Os ("optsize") is left as a
possible follow-up.
As noted in the bug report, the zero-extending load may have
shorter latency/better throughput across a wide range of x86
micro-arches, and it avoids a potential false dependency.
The cost is an extra instruction byte.
This could cause perf ups and downs from secondary effects,
but I don't think it is possible to account for those in
advance, and that will likely also depend on exact micro-arch.
This does bring LLVM x86 codegen more in line with existing
gcc codegen, so if problems are exposed they are more likely
to occur for both compilers.
Differential Revision: https://reviews.llvm.org/D129775
switch (MI->getOpcode()) {
case X86::MOV8rm:
- // Only replace 8 bit loads with the zero extending versions if
- // in an inner most loop and not optimizing for size. This takes
- // an extra byte to encode, and provides limited performance upside.
- if (MachineLoop *ML = MLI->getLoopFor(&MBB))
- if (ML->begin() == ML->end() && !OptForSize)
- return tryReplaceLoad(X86::MOVZX32rm8, MI);
+ // Replace 8-bit loads with the zero-extending version if not optimizing
+ // for size. The extending op is cheaper across a wide range of uarch and
+ // it avoids a potentially expensive partial register stall. It takes an
+ // extra byte to encode, however, so don't do this when optimizing for size.
+ if (!OptForSize)
+ return tryReplaceLoad(X86::MOVZX32rm8, MI);
break;
case X86::MOV16rm:
define i32 @test5(i32 %B, i8 %C) {
; CHECK-LABEL: test5:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: movl A, %eax
; CHECK-NEXT: shldl %cl, %edx, %eax
; CHECK: # %bb.0:
; CHECK-NEXT: movl A, %eax
; CHECK-NEXT: movzwl 2(%eax), %eax
-; CHECK-NEXT: movb B, %cl
+; CHECK-NEXT: movzbl B, %ecx
; CHECK-NEXT: movl C, %edx
; CHECK-NEXT: andb $16, %cl
; CHECK-NEXT: shll %cl, %edx
; CHECK-NEXT: cmpl $1, %eax
; CHECK-NEXT: ja .LBB0_2
; CHECK-NEXT: # %bb.1: # %bb77
-; CHECK-NEXT: movb 0, %al
-; CHECK-NEXT: movb 0, %al
+; CHECK-NEXT: movzbl 0, %eax
+; CHECK-NEXT: movzbl 0, %eax
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: .LBB0_2: # %bb84
; CHECK-NEXT: movq _PyUFunc_API@GOTPCREL(%rip), %rbp
; CHECK-NEXT: movq (%rbp), %rax
; CHECK-NEXT: callq *216(%rax)
-; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %dl
+; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: testb %dl, %dl
; CHECK-NEXT: je LBB0_11
; CHECK-NEXT: ## %bb.7: ## %cond_false.i
-; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %bl
+; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
; CHECK-NEXT: movzbl %bl, %ecx
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: divb %dl
; CHECK-NEXT: LBB0_11: ## %cond_true.i
; CHECK-NEXT: movl $4, %edi
; CHECK-NEXT: callq _feraiseexcept
-; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %dl
-; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %bl
+; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
+; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
; CHECK-NEXT: xorl %r14d, %r14d
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: je LBB0_14
; CHECK-NEXT: .cfi_offset %ebx, -12
; CHECK-NEXT: .cfi_offset %ebp, -8
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
-; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl
+; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; CHECK-NEXT: testb $1, %bl
; CHECK-NEXT: je LBB0_25
; CHECK-NEXT: ## %bb.1: ## %bb116.i
; CHECK-NEXT: movl %esp, %edi
; CHECK-NEXT: movl $g1s63, %esi
; CHECK-NEXT: rep;movsl (%esi), %es:(%edi)
-; CHECK-NEXT: movb g1s63+62, %al
+; CHECK-NEXT: movzbl g1s63+62, %eax
; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp)
; CHECK-NEXT: movzwl g1s63+60, %eax
; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SOURCE-SCHED-NEXT: xorl %ecx, %ecx
; SOURCE-SCHED-NEXT: cmpl $2, %eax
; SOURCE-SCHED-NEXT: setge %cl
-; SOURCE-SCHED-NEXT: movb g_73, %dl
+; SOURCE-SCHED-NEXT: movzbl g_73, %edx
; SOURCE-SCHED-NEXT: xorl %eax, %eax
; SOURCE-SCHED-NEXT: subb {{[0-9]+}}(%esp), %al
; SOURCE-SCHED-NEXT: testb %dl, %dl
-; RUN: llc < %s -mcpu=core2 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=core2 | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.4"
declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
define fastcc i32 @cli_magic_scandesc(ptr %in) nounwind ssp {
+; CHECK-LABEL: cli_magic_scandesc:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: movq __stack_chk_guard(%rip), %rax
+; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzbl (%rsp), %eax
+; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
+; CHECK-NEXT: movq (%rdi), %rdx
+; CHECK-NEXT: movq 8(%rdi), %rsi
+; CHECK-NEXT: movq %rdx, (%rsp)
+; CHECK-NEXT: movq 24(%rdi), %rdx
+; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq 16(%rdi), %rdx
+; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq 32(%rdi), %rdx
+; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq 40(%rdi), %rdx
+; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq 48(%rdi), %rdx
+; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq 56(%rdi), %rdx
+; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movb %al, (%rsp)
+; CHECK-NEXT: movb %cl, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq __stack_chk_guard(%rip), %rax
+; CHECK-NEXT: cmpq {{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT: jne .LBB0_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB0_2: # %entry
+; CHECK-NEXT: callq __stack_chk_fail@PLT
entry:
%a = alloca [64 x i8]
%c = getelementptr inbounds [64 x i8], ptr %a, i64 0, i32 30
store i8 %e, ptr %c, align 8
ret i32 0
}
-
-; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
-; CHECK: movb (%rsp), [[R1:%.+]]
-; CHECK: movb 30(%rsp), [[R0:%.+]]
-; CHECK: movb [[R1]], (%rsp)
-; CHECK: movb [[R0]], 30(%rsp)
-; CHECK: callq ___stack_chk_fail
define i8 @negative_CopyFromReg(i32 %a1_wide, i32 %a2_wide_orig, i32 %inc) nounwind {
; I386-NOCMOV-LABEL: negative_CopyFromReg:
; I386-NOCMOV: # %bb.0:
-; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al
+; I386-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I386-NOCMOV-NEXT: addl {{[0-9]+}}(%esp), %ecx
; I386-NOCMOV-NEXT: cmpb %cl, %al
;
; I686-NOCMOV-LABEL: negative_CopyFromReg:
; I686-NOCMOV: # %bb.0:
-; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al
+; I686-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; I686-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I686-NOCMOV-NEXT: addl {{[0-9]+}}(%esp), %ecx
; I686-NOCMOV-NEXT: cmpb %cl, %al
define i8 @negative_CopyFromRegs(i32 %a1_wide, i32 %a2_wide) nounwind {
; I386-NOCMOV-LABEL: negative_CopyFromRegs:
; I386-NOCMOV: # %bb.0:
-; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %cl
-; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al
+; I386-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; I386-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; I386-NOCMOV-NEXT: cmpb %cl, %al
; I386-NOCMOV-NEXT: jg .LBB4_2
; I386-NOCMOV-NEXT: # %bb.1:
;
; I686-NOCMOV-LABEL: negative_CopyFromRegs:
; I686-NOCMOV: # %bb.0:
-; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %cl
-; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al
+; I686-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; I686-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; I686-NOCMOV-NEXT: cmpb %cl, %al
; I686-NOCMOV-NEXT: jg .LBB4_2
; I686-NOCMOV-NEXT: # %bb.1:
; X32-NEXT: .cfi_offset %esi, -12
; X32-NEXT: .cfi_offset %ebx, -8
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movb (%eax), %bl
+; X32-NEXT: movzbl (%eax), %ebx
; X32-NEXT: movzbl %bl, %esi
; X32-NEXT: movl %esi, (%esp)
; X32-NEXT: calll take_char
; X64-NEXT: pushq %rbx
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: .cfi_offset %rbx, -16
-; X64-NEXT: movb (%rdi), %al
+; X64-NEXT: movzbl (%rdi), %eax
; X64-NEXT: movzbl %al, %ebx
; X64-NEXT: movl %ebx, %edi
; X64-NEXT: callq take_char
; CHECK-LABEL: test_load_i1:
; CHECK: # %bb.0:
; CHECK-NEXT: movl 4(%esp), %eax
-; CHECK-NEXT: movb (%eax), %al
+; CHECK-NEXT: movzbl (%eax), %eax
; CHECK-NEXT: retl
%r = load i1, ptr %p1
ret i1 %r
; CHECK-LABEL: test_load_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: movl 4(%esp), %eax
-; CHECK-NEXT: movb (%eax), %al
+; CHECK-NEXT: movzbl (%eax), %eax
; CHECK-NEXT: retl
%r = load i8, ptr %p1
ret i8 %r
define i1 @test_load_i1(ptr %p1) {
; ALL-LABEL: test_load_i1:
; ALL: # %bb.0:
-; ALL-NEXT: movb (%rdi), %al
+; ALL-NEXT: movzbl (%rdi), %eax
; ALL-NEXT: retq
%r = load i1, ptr %p1
ret i1 %r
define i8 @test_load_i8(ptr %p1) {
; ALL-LABEL: test_load_i8:
; ALL: # %bb.0:
-; ALL-NEXT: movb (%rdi), %al
+; ALL-NEXT: movzbl (%rdi), %eax
; ALL-NEXT: retq
%r = load i8, ptr %p1
ret i8 %r
; CHECK-MINGW-X86-NEXT: .cfi_def_cfa_offset 12
; CHECK-MINGW-X86-NEXT: .cfi_offset %esi, -12
; CHECK-MINGW-X86-NEXT: .cfi_offset %edi, -8
-; CHECK-MINGW-X86-NEXT: movb __ZGVZ2amiE2au, %al
+; CHECK-MINGW-X86-NEXT: movzbl __ZGVZ2amiE2au, %eax
; CHECK-MINGW-X86-NEXT: testb %al, %al
; CHECK-MINGW-X86-NEXT: jne LBB0_4
; CHECK-MINGW-X86-NEXT: # %bb.1: # %init.check
;
; X86-LABEL: test_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: sarb $7, %cl
; X86-NEXT: xorb %cl, %al
; X86-NEXT: xorb %al, %bh
; X86-NEXT: subb %al, %bh
; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarb $7, %al
; X86-NEXT: xorb %al, %cl
; X86-NEXT: subb %al, %cl
; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarb $7, %al
; X86-NEXT: xorb %al, %cl
; X86-NEXT: sarb $7, %al
; X86-NEXT: xorb %al, %cl
; X86-NEXT: subb %al, %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb %al, %ah
; X86-NEXT: sarb $7, %ah
; X86-NEXT: xorb %ah, %al
; X86-NEXT: movb %dh, 11(%esi)
; X86-NEXT: movb %bl, 10(%esi)
; X86-NEXT: movb %bh, 9(%esi)
-; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, 8(%esi)
-; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, 7(%esi)
-; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, 6(%esi)
-; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, 5(%esi)
-; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, 4(%esi)
-; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, 3(%esi)
-; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, 2(%esi)
-; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, 1(%esi)
-; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: movb %al, (%esi)
; X86-NEXT: movl %esi, %eax
; X86-NEXT: addl $12, %esp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-LABEL: test_i32_sub_add_sext_var:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shll %cl, %edx
; X86-NEXT: sarl $31, %edx
define i8 @foo(ptr %V) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
-; CHECK-NEXT: movb 2(%rdi), %al
+; CHECK-NEXT: movzbl 2(%rdi), %eax
; CHECK-NEXT: andb $95, %al
; CHECK-NEXT: retq
%V3i8 = load <3 x i8>, ptr %V, align 4
; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp)
; CHECK-NEXT: je .LBB1_5
; CHECK-NEXT: # %bb.1: # %bb0.preheader
-; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al
+; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB1_2: # %bb0
define i8 @and_i8_ri(i8 zeroext %0, i8 zeroext %1) {
; X86-LABEL: and_i8_ri:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andb $-17, %cl
; X86-NEXT: je .LBB0_2
define i8 @and_i8_rr(i8 zeroext %0, i8 zeroext %1) {
; X86-LABEL: and_i8_rr:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb %al, %cl
; X86-NEXT: je .LBB1_2
; X86-NEXT: # %bb.1:
; CHECK: # %bb.0:
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %eax
-; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl
+; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; CHECK-NEXT: movl %ebx, %eax
; CHECK-NEXT: andb $1, %al
; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp)
; CHECK-LABEL: use_i3:
; CHECK: # %bb.0:
; CHECK-NEXT: pushl %eax
-; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al
+; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: andb $7, %al
-; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: andb $7, %cl
; CHECK-NEXT: movb %cl, {{[0-9]+}}(%esp)
; CHECK-NEXT: cmpb %cl, %al
; RUN: llc < %s -mtriple=i686-- -mcpu=atom | FileCheck %s
; CHECK: movl
-; CHECK: movb
-; CHECK: movb
+; CHECK: movzbl
+; CHECK: movzbl
; CHECK: cmpb
; CHECK: notb
; CHECK: notb
; X64-LABEL: add8:
; X64: # %bb.0:
; X64-NEXT: mfence
-; X64-NEXT: movb (%rdi), %al
+; X64-NEXT: movzbl (%rdi), %eax
; X64-NEXT: retq
;
; X86-SSE2-LABEL: add8:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: mfence
-; X86-SSE2-NEXT: movb (%eax), %al
+; X86-SSE2-NEXT: movzbl (%eax), %eax
; X86-SSE2-NEXT: retl
;
; X86-SLM-LABEL: add8:
;
; X32-LABEL: add_8r:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: addb %al, (%ecx)
; X32-NEXT: retl
;
; X32-LABEL: sub_8r:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: subb %al, (%ecx)
; X32-NEXT: retl
;
; X32-LABEL: and_8r:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: andb %al, (%ecx)
; X32-NEXT: retl
;
; X32-LABEL: or_8r:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: orb %al, (%ecx)
; X32-NEXT: retl
;
; X32-LABEL: xor_8r:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: xorb %al, (%ecx)
; X32-NEXT: retl
; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck --check-prefixes=CHECK,CHECK-O3 %s
define i8 @load_i8(ptr %ptr) {
-; CHECK-LABEL: load_i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movb (%rdi), %al
-; CHECK-NEXT: retq
+; CHECK-O0-LABEL: load_i8:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movb (%rdi), %al
+; CHECK-O0-NEXT: retq
+;
+; CHECK-O3-LABEL: load_i8:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-O3-NEXT: retq
%v = load atomic i8, ptr %ptr monotonic, align 1
ret i8 %v
}
; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-EX %s
define i8 @load_i8(i8* %ptr) {
-; CHECK-LABEL: load_i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movb (%rdi), %al
-; CHECK-NEXT: retq
+; CHECK-O0-LABEL: load_i8:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movb (%rdi), %al
+; CHECK-O0-NEXT: retq
+;
+; CHECK-O3-LABEL: load_i8:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-O3-NEXT: retq
%v = load atomic i8, i8* %ptr unordered, align 1
ret i8 %v
}
; CHECK-NEXT: movl %eax, 24(%rdi)
; CHECK-NEXT: movzwl -4(%rdi), %eax
; CHECK-NEXT: movw %ax, 28(%rdi)
-; CHECK-NEXT: movb -2(%rdi), %al
+; CHECK-NEXT: movzbl -2(%rdi), %eax
; CHECK-NEXT: movb %al, 30(%rdi)
-; CHECK-NEXT: movb -1(%rdi), %al
+; CHECK-NEXT: movzbl -1(%rdi), %eax
; CHECK-NEXT: movb %al, 31(%rdi)
; CHECK-NEXT: retq
;
; CHECK-AVX2-NEXT: movl %eax, 24(%rdi)
; CHECK-AVX2-NEXT: movzwl -4(%rdi), %eax
; CHECK-AVX2-NEXT: movw %ax, 28(%rdi)
-; CHECK-AVX2-NEXT: movb -2(%rdi), %al
+; CHECK-AVX2-NEXT: movzbl -2(%rdi), %eax
; CHECK-AVX2-NEXT: movb %al, 30(%rdi)
-; CHECK-AVX2-NEXT: movb -1(%rdi), %al
+; CHECK-AVX2-NEXT: movzbl -1(%rdi), %eax
; CHECK-AVX2-NEXT: movb %al, 31(%rdi)
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-NEXT: movl %eax, 24(%rdi)
; CHECK-AVX512-NEXT: movzwl -4(%rdi), %eax
; CHECK-AVX512-NEXT: movw %ax, 28(%rdi)
-; CHECK-AVX512-NEXT: movb -2(%rdi), %al
+; CHECK-AVX512-NEXT: movzbl -2(%rdi), %eax
; CHECK-AVX512-NEXT: movb %al, 30(%rdi)
-; CHECK-AVX512-NEXT: movb -1(%rdi), %al
+; CHECK-AVX512-NEXT: movzbl -1(%rdi), %eax
; CHECK-AVX512-NEXT: movb %al, 31(%rdi)
; CHECK-AVX512-NEXT: retq
entry:
; CHECK-NEXT: movw %ax, 24(%rdi)
; CHECK-NEXT: movl -6(%rdi), %eax
; CHECK-NEXT: movl %eax, 26(%rdi)
-; CHECK-NEXT: movb -2(%rdi), %al
+; CHECK-NEXT: movzbl -2(%rdi), %eax
; CHECK-NEXT: movb %al, 30(%rdi)
-; CHECK-NEXT: movb -1(%rdi), %al
+; CHECK-NEXT: movzbl -1(%rdi), %eax
; CHECK-NEXT: movb %al, 31(%rdi)
; CHECK-NEXT: retq
;
; CHECK-AVX2-NEXT: movw %ax, 24(%rdi)
; CHECK-AVX2-NEXT: movl -6(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 26(%rdi)
-; CHECK-AVX2-NEXT: movb -2(%rdi), %al
+; CHECK-AVX2-NEXT: movzbl -2(%rdi), %eax
; CHECK-AVX2-NEXT: movb %al, 30(%rdi)
-; CHECK-AVX2-NEXT: movb -1(%rdi), %al
+; CHECK-AVX2-NEXT: movzbl -1(%rdi), %eax
; CHECK-AVX2-NEXT: movb %al, 31(%rdi)
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-NEXT: movw %ax, 24(%rdi)
; CHECK-AVX512-NEXT: movl -6(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 26(%rdi)
-; CHECK-AVX512-NEXT: movb -2(%rdi), %al
+; CHECK-AVX512-NEXT: movzbl -2(%rdi), %eax
; CHECK-AVX512-NEXT: movb %al, 30(%rdi)
-; CHECK-AVX512-NEXT: movb -1(%rdi), %al
+; CHECK-AVX512-NEXT: movzbl -1(%rdi), %eax
; CHECK-AVX512-NEXT: movb %al, 31(%rdi)
; CHECK-AVX512-NEXT: retq
entry:
; CHECK-NEXT: movl $0, -11(%rdi)
; CHECK-NEXT: movl -16(%rdi), %eax
; CHECK-NEXT: movl %eax, 16(%rdi)
-; CHECK-NEXT: movb -12(%rdi), %al
+; CHECK-NEXT: movzbl -12(%rdi), %eax
; CHECK-NEXT: movb %al, 20(%rdi)
; CHECK-NEXT: movl -11(%rdi), %eax
; CHECK-NEXT: movl %eax, 21(%rdi)
; CHECK-NEXT: movl %eax, 25(%rdi)
; CHECK-NEXT: movzwl -3(%rdi), %eax
; CHECK-NEXT: movw %ax, 29(%rdi)
-; CHECK-NEXT: movb -1(%rdi), %al
+; CHECK-NEXT: movzbl -1(%rdi), %eax
; CHECK-NEXT: movb %al, 31(%rdi)
; CHECK-NEXT: retq
;
; CHECK-AVX2-NEXT: movl $0, -11(%rdi)
; CHECK-AVX2-NEXT: movl -16(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 16(%rdi)
-; CHECK-AVX2-NEXT: movb -12(%rdi), %al
+; CHECK-AVX2-NEXT: movzbl -12(%rdi), %eax
; CHECK-AVX2-NEXT: movb %al, 20(%rdi)
; CHECK-AVX2-NEXT: movl -11(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 21(%rdi)
; CHECK-AVX2-NEXT: movl %eax, 25(%rdi)
; CHECK-AVX2-NEXT: movzwl -3(%rdi), %eax
; CHECK-AVX2-NEXT: movw %ax, 29(%rdi)
-; CHECK-AVX2-NEXT: movb -1(%rdi), %al
+; CHECK-AVX2-NEXT: movzbl -1(%rdi), %eax
; CHECK-AVX2-NEXT: movb %al, 31(%rdi)
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-NEXT: movl $0, -11(%rdi)
; CHECK-AVX512-NEXT: movl -16(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 16(%rdi)
-; CHECK-AVX512-NEXT: movb -12(%rdi), %al
+; CHECK-AVX512-NEXT: movzbl -12(%rdi), %eax
; CHECK-AVX512-NEXT: movb %al, 20(%rdi)
; CHECK-AVX512-NEXT: movl -11(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 21(%rdi)
; CHECK-AVX512-NEXT: movl %eax, 25(%rdi)
; CHECK-AVX512-NEXT: movzwl -3(%rdi), %eax
; CHECK-AVX512-NEXT: movw %ax, 29(%rdi)
-; CHECK-AVX512-NEXT: movb -1(%rdi), %al
+; CHECK-AVX512-NEXT: movzbl -1(%rdi), %eax
; CHECK-AVX512-NEXT: movb %al, 31(%rdi)
; CHECK-AVX512-NEXT: retq
entry:
; CHECK-NEXT: movb $0, -11(%rdi)
; CHECK-NEXT: movzwl -16(%rdi), %eax
; CHECK-NEXT: movw %ax, 16(%rdi)
-; CHECK-NEXT: movb -14(%rdi), %al
+; CHECK-NEXT: movzbl -14(%rdi), %eax
; CHECK-NEXT: movb %al, 18(%rdi)
; CHECK-NEXT: movzwl -13(%rdi), %eax
; CHECK-NEXT: movw %ax, 19(%rdi)
-; CHECK-NEXT: movb -11(%rdi), %al
+; CHECK-NEXT: movzbl -11(%rdi), %eax
; CHECK-NEXT: movb %al, 21(%rdi)
; CHECK-NEXT: movq -10(%rdi), %rax
; CHECK-NEXT: movq %rax, 22(%rdi)
; CHECK-AVX2-NEXT: movb $0, -11(%rdi)
; CHECK-AVX2-NEXT: movzwl -16(%rdi), %eax
; CHECK-AVX2-NEXT: movw %ax, 16(%rdi)
-; CHECK-AVX2-NEXT: movb -14(%rdi), %al
+; CHECK-AVX2-NEXT: movzbl -14(%rdi), %eax
; CHECK-AVX2-NEXT: movb %al, 18(%rdi)
; CHECK-AVX2-NEXT: movzwl -13(%rdi), %eax
; CHECK-AVX2-NEXT: movw %ax, 19(%rdi)
-; CHECK-AVX2-NEXT: movb -11(%rdi), %al
+; CHECK-AVX2-NEXT: movzbl -11(%rdi), %eax
; CHECK-AVX2-NEXT: movb %al, 21(%rdi)
; CHECK-AVX2-NEXT: movq -10(%rdi), %rax
; CHECK-AVX2-NEXT: movq %rax, 22(%rdi)
; CHECK-AVX512-NEXT: movb $0, -11(%rdi)
; CHECK-AVX512-NEXT: movzwl -16(%rdi), %eax
; CHECK-AVX512-NEXT: movw %ax, 16(%rdi)
-; CHECK-AVX512-NEXT: movb -14(%rdi), %al
+; CHECK-AVX512-NEXT: movzbl -14(%rdi), %eax
; CHECK-AVX512-NEXT: movb %al, 18(%rdi)
; CHECK-AVX512-NEXT: movzwl -13(%rdi), %eax
; CHECK-AVX512-NEXT: movw %ax, 19(%rdi)
-; CHECK-AVX512-NEXT: movb -11(%rdi), %al
+; CHECK-AVX512-NEXT: movzbl -11(%rdi), %eax
; CHECK-AVX512-NEXT: movb %al, 21(%rdi)
; CHECK-AVX512-NEXT: movq -10(%rdi), %rax
; CHECK-AVX512-NEXT: movq %rax, 22(%rdi)
; CHECK-NEXT: .LBB5_2: # %if.end
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: movq %rax, (%rsi)
-; CHECK-NEXT: movb 8(%rdi), %al
+; CHECK-NEXT: movzbl 8(%rdi), %eax
; CHECK-NEXT: movb %al, 8(%rsi)
; CHECK-NEXT: movl 9(%rdi), %eax
; CHECK-NEXT: movl %eax, 9(%rsi)
; CHECK-NEXT: movzwl 13(%rdi), %eax
; CHECK-NEXT: movw %ax, 13(%rsi)
-; CHECK-NEXT: movb 15(%rdi), %al
+; CHECK-NEXT: movzbl 15(%rdi), %eax
; CHECK-NEXT: movb %al, 15(%rsi)
; CHECK-NEXT: retq
;
; CHECK-AVX2-NEXT: .LBB5_2: # %if.end
; CHECK-AVX2-NEXT: movq (%rdi), %rax
; CHECK-AVX2-NEXT: movq %rax, (%rsi)
-; CHECK-AVX2-NEXT: movb 8(%rdi), %al
+; CHECK-AVX2-NEXT: movzbl 8(%rdi), %eax
; CHECK-AVX2-NEXT: movb %al, 8(%rsi)
; CHECK-AVX2-NEXT: movl 9(%rdi), %eax
; CHECK-AVX2-NEXT: movl %eax, 9(%rsi)
; CHECK-AVX2-NEXT: movzwl 13(%rdi), %eax
; CHECK-AVX2-NEXT: movw %ax, 13(%rsi)
-; CHECK-AVX2-NEXT: movb 15(%rdi), %al
+; CHECK-AVX2-NEXT: movzbl 15(%rdi), %eax
; CHECK-AVX2-NEXT: movb %al, 15(%rsi)
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-NEXT: .LBB5_2: # %if.end
; CHECK-AVX512-NEXT: movq (%rdi), %rax
; CHECK-AVX512-NEXT: movq %rax, (%rsi)
-; CHECK-AVX512-NEXT: movb 8(%rdi), %al
+; CHECK-AVX512-NEXT: movzbl 8(%rdi), %eax
; CHECK-AVX512-NEXT: movb %al, 8(%rsi)
; CHECK-AVX512-NEXT: movl 9(%rdi), %eax
; CHECK-AVX512-NEXT: movl %eax, 9(%rsi)
; CHECK-AVX512-NEXT: movzwl 13(%rdi), %eax
; CHECK-AVX512-NEXT: movw %ax, 13(%rsi)
-; CHECK-AVX512-NEXT: movb 15(%rdi), %al
+; CHECK-AVX512-NEXT: movzbl 15(%rdi), %eax
; CHECK-AVX512-NEXT: movb %al, 15(%rsi)
; CHECK-AVX512-NEXT: retq
entry:
; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp)
; KNL-NEXT: cmovnel %eax, %r10d
; KNL-NEXT: movq %rdi, %rax
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: andl $1, %edi
; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $14, %k1, %k1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $13, %k1, %k1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $12, %k1, %k1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $11, %k1, %k1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $10, %k1, %k1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $9, %k1, %k1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $8, %k1, %k1
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $7, %k1, %k1
; KNL-NEXT: movw $-513, %di ## imm = 0xFDFF
; KNL-NEXT: kmovw %edi, %k7
; KNL-NEXT: kandw %k7, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $6, %k1, %k1
; KNL-NEXT: movw $-1025, %di ## imm = 0xFBFF
; KNL-NEXT: kmovw %edi, %k4
; KNL-NEXT: kandw %k4, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $5, %k1, %k1
; KNL-NEXT: movw $-2049, %di ## imm = 0xF7FF
; KNL-NEXT: kmovw %edi, %k3
; KNL-NEXT: kandw %k3, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $4, %k1, %k1
; KNL-NEXT: movw $-4097, %di ## imm = 0xEFFF
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kandw %k2, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $3, %k1, %k1
; KNL-NEXT: movw $-8193, %di ## imm = 0xDFFF
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k5
; KNL-NEXT: kshiftlw $15, %k5, %k5
; KNL-NEXT: kshiftrw $2, %k5, %k5
; KNL-NEXT: movw $-16385, %di ## imm = 0xBFFF
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kandw %k0, %k5, %k5
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $14, %k6, %k6
; KNL-NEXT: korw %k6, %k5, %k5
; KNL-NEXT: kshiftlw $1, %k5, %k5
; KNL-NEXT: kshiftrw $1, %k5, %k5
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: korw %k6, %k5, %k5
; KNL-NEXT: korw %k6, %k5, %k5
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; KNL-NEXT: kandw %k6, %k5, %k5
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $10, %k6, %k6
; KNL-NEXT: korw %k6, %k5, %k5
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; KNL-NEXT: kandw %k6, %k5, %k5
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $9, %k6, %k6
; KNL-NEXT: korw %k6, %k5, %k5
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; KNL-NEXT: kandw %k6, %k5, %k5
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $8, %k6, %k6
; KNL-NEXT: korw %k6, %k5, %k5
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; KNL-NEXT: kandw %k6, %k5, %k5
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $7, %k6, %k6
; KNL-NEXT: korw %k6, %k5, %k5
; KNL-NEXT: kandw %k7, %k5, %k5
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $6, %k6, %k6
; KNL-NEXT: korw %k6, %k5, %k5
; KNL-NEXT: kandw %k4, %k5, %k4
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k5
; KNL-NEXT: kshiftlw $15, %k5, %k5
; KNL-NEXT: kshiftrw $5, %k5, %k5
; KNL-NEXT: korw %k5, %k4, %k4
; KNL-NEXT: kandw %k3, %k4, %k3
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $4, %k4, %k4
; KNL-NEXT: korw %k4, %k3, %k3
; KNL-NEXT: kandw %k2, %k3, %k2
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $3, %k3, %k3
; KNL-NEXT: korw %k3, %k2, %k2
; KNL-NEXT: kandw %k1, %k2, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $2, %k2, %k2
; KNL-NEXT: korw %k2, %k1, %k1
; KNL-NEXT: kandw %k0, %k1, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: kshiftlw $14, %k1, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: cmovnel %edx, %ecx
; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp)
; KNL_X32-NEXT: cmovnel %edx, %eax
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: andl $1, %edx
; KNL_X32-NEXT: kmovw %edx, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $14, %k1, %k1
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $13, %k1, %k1
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $12, %k1, %k1
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $11, %k1, %k1
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $10, %k1, %k1
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $9, %k1, %k1
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $8, %k1, %k1
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $7, %k1, %k1
; KNL_X32-NEXT: movw $-513, %dx ## imm = 0xFDFF
; KNL_X32-NEXT: kmovw %edx, %k7
; KNL_X32-NEXT: kandw %k7, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $6, %k1, %k1
; KNL_X32-NEXT: movw $-1025, %dx ## imm = 0xFBFF
; KNL_X32-NEXT: kmovw %edx, %k4
; KNL_X32-NEXT: kandw %k4, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $5, %k1, %k1
; KNL_X32-NEXT: movw $-2049, %dx ## imm = 0xF7FF
; KNL_X32-NEXT: kmovw %edx, %k3
; KNL_X32-NEXT: kandw %k3, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $4, %k1, %k1
; KNL_X32-NEXT: movw $-4097, %dx ## imm = 0xEFFF
; KNL_X32-NEXT: kmovw %edx, %k2
; KNL_X32-NEXT: kandw %k2, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $3, %k1, %k1
; KNL_X32-NEXT: movw $-8193, %dx ## imm = 0xDFFF
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k5
; KNL_X32-NEXT: kshiftlw $15, %k5, %k5
; KNL_X32-NEXT: kshiftrw $2, %k5, %k5
; KNL_X32-NEXT: movw $-16385, %dx ## imm = 0xBFFF
; KNL_X32-NEXT: kmovw %edx, %k0
; KNL_X32-NEXT: kandw %k0, %k5, %k5
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k6
; KNL_X32-NEXT: kshiftlw $14, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kshiftlw $1, %k5, %k5
; KNL_X32-NEXT: kshiftrw $1, %k5, %k5
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kmovw %k5, (%esp) ## 2-byte Spill
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: andl $1, %edx
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %bl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; KNL_X32-NEXT: kmovw %ebx, %k5
; KNL_X32-NEXT: kshiftlw $15, %k5, %k5
; KNL_X32-NEXT: kshiftrw $14, %k5, %k5
; KNL_X32-NEXT: korw %k5, %k6, %k5
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT: kandw %k6, %k5, %k5
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $13, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT: kandw %k6, %k5, %k5
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $12, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT: kandw %k6, %k5, %k5
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $11, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT: kandw %k6, %k5, %k5
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $10, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT: kandw %k6, %k5, %k5
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $9, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT: kandw %k6, %k5, %k5
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $8, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT: kandw %k6, %k5, %k5
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $7, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kandw %k7, %k5, %k5
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $6, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k5, %k5
; KNL_X32-NEXT: kandw %k4, %k5, %k4
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k5
; KNL_X32-NEXT: kshiftlw $15, %k5, %k5
; KNL_X32-NEXT: kshiftrw $5, %k5, %k5
; KNL_X32-NEXT: korw %k5, %k4, %k4
; KNL_X32-NEXT: kandw %k3, %k4, %k3
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k4
; KNL_X32-NEXT: kshiftlw $15, %k4, %k4
; KNL_X32-NEXT: kshiftrw $4, %k4, %k4
; KNL_X32-NEXT: korw %k4, %k3, %k3
; KNL_X32-NEXT: kandw %k2, %k3, %k2
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k3
; KNL_X32-NEXT: kshiftlw $15, %k3, %k3
; KNL_X32-NEXT: kshiftrw $3, %k3, %k3
; KNL_X32-NEXT: korw %k3, %k2, %k2
; KNL_X32-NEXT: kandw %k1, %k2, %k1
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $2, %k2, %k2
; KNL_X32-NEXT: korw %k2, %k1, %k1
; KNL_X32-NEXT: kandw %k0, %k1, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kshiftlw $14, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: kshiftlw $1, %k0, %k0
; KNL_X32-NEXT: kshiftrw $1, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL-LABEL: test17:
; KNL: ## %bb.0:
; KNL-NEXT: movq %rdi, %rax
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: andl $1, %edi
; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $14, %k1, %k1
; KNL-NEXT: movw $-5, %di
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $13, %k2, %k2
; KNL-NEXT: movw $-9, %di
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kandw %k2, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $12, %k3, %k3
; KNL-NEXT: movw $-17, %di
; KNL-NEXT: kmovw %edi, %k3
; KNL-NEXT: kandw %k3, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $11, %k4, %k4
; KNL-NEXT: movw $-33, %di
; KNL-NEXT: kmovw %edi, %k4
; KNL-NEXT: kandw %k4, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k5
; KNL-NEXT: kshiftlw $15, %k5, %k5
; KNL-NEXT: kshiftrw $10, %k5, %k5
; KNL-NEXT: movw $-65, %di
; KNL-NEXT: kmovw %edi, %k5
; KNL-NEXT: kandw %k5, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $9, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: andl $1, %edi
; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $14, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $13, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k2, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $12, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k3, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $11, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k4, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $10, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k5, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $9, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %r10b
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
; KNL-NEXT: andl $1, %r10d
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: kmovw %r10d, %k6
; KNL-NEXT: korw %k0, %k6, %k0
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $13, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k2, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $12, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k3, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $11, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k4, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $10, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k5, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $9, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %r10b
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
; KNL-NEXT: andl $1, %r10d
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: kmovw %r10d, %k6
; KNL-NEXT: korw %k0, %k6, %k0
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $13, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k2, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $12, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k3, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $11, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k4, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $10, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k5, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $9, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %r10b
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
; KNL-NEXT: andl $1, %r10d
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: kmovw %r10d, %k6
; KNL-NEXT: korw %k0, %k6, %k0
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $13, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k2, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $12, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k3, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $11, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k4, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $10, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k5, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $9, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %r10b
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
; KNL-NEXT: andl $1, %r10d
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: kmovw %r10d, %k6
; KNL-NEXT: korw %k0, %k6, %k0
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $13, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k2, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $12, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k3, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $11, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k4, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $10, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k5, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $9, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %r10b
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
; KNL-NEXT: andl $1, %r10d
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: kmovw %r10d, %k6
; KNL-NEXT: korw %k0, %k6, %k0
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $13, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k2, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $12, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k3, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $11, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k4, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $10, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k5, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $9, %k6, %k6
; KNL-NEXT: kshiftrw $11, %k7, %k7
; KNL-NEXT: korw %k7, %k0, %k0
; KNL-NEXT: kandw %k4, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $10, %k7, %k7
; KNL-NEXT: korw %k7, %k0, %k0
; KNL-NEXT: kandw %k5, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $9, %k7, %k7
; KNL-NEXT: korw %k7, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: andl $1, %ecx
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; KNL-NEXT: kmovw %edx, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $14, %k7, %k7
; KNL-NEXT: kmovw %ecx, %k6
; KNL-NEXT: korw %k7, %k6, %k6
; KNL-NEXT: kandw %k1, %k6, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $13, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kandw %k2, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $12, %k2, %k2
; KNL-NEXT: korw %k2, %k1, %k1
; KNL-NEXT: kandw %k3, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $11, %k2, %k2
; KNL-NEXT: korw %k2, %k1, %k1
; KNL-NEXT: kandw %k4, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $10, %k2, %k2
; KNL-NEXT: korw %k2, %k1, %k1
; KNL-NEXT: kandw %k5, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $9, %k2, %k2
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: pushl %ebx
; KNL_X32-NEXT: subl $16, %esp
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: andl $1, %eax
; KNL_X32-NEXT: kmovw %eax, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: kshiftrw $14, %k1, %k1
; KNL_X32-NEXT: movw $-5, %ax
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $13, %k2, %k2
; KNL_X32-NEXT: movw $-9, %ax
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kandw %k2, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k3
; KNL_X32-NEXT: kshiftlw $15, %k3, %k3
; KNL_X32-NEXT: kshiftrw $12, %k3, %k3
; KNL_X32-NEXT: movw $-17, %ax
; KNL_X32-NEXT: kmovw %eax, %k3
; KNL_X32-NEXT: kandw %k3, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k4
; KNL_X32-NEXT: kshiftlw $15, %k4, %k4
; KNL_X32-NEXT: kshiftrw $11, %k4, %k4
; KNL_X32-NEXT: movw $-33, %ax
; KNL_X32-NEXT: kmovw %eax, %k4
; KNL_X32-NEXT: kandw %k4, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k5
; KNL_X32-NEXT: kshiftlw $15, %k5, %k5
; KNL_X32-NEXT: kshiftrw $10, %k5, %k5
; KNL_X32-NEXT: movw $-65, %ax
; KNL_X32-NEXT: kmovw %eax, %k5
; KNL_X32-NEXT: kandw %k5, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $9, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: andl $1, %eax
; KNL_X32-NEXT: kmovw %eax, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $14, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $13, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k2, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $12, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k3, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $11, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k4, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $10, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k5, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $9, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: andl $1, %eax
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; KNL_X32-NEXT: kmovw %ecx, %k0
; KNL_X32-NEXT: kshiftlw $15, %k0, %k0
; KNL_X32-NEXT: kshiftrw $14, %k0, %k0
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: korw %k0, %k6, %k0
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $13, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k2, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $12, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k3, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $11, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k4, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $10, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k5, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $9, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: andl $1, %eax
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; KNL_X32-NEXT: kmovw %ecx, %k0
; KNL_X32-NEXT: kshiftlw $15, %k0, %k0
; KNL_X32-NEXT: kshiftrw $14, %k0, %k0
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: korw %k0, %k6, %k0
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $13, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k2, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $12, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k3, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $11, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k4, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $10, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k5, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $9, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: andl $1, %eax
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; KNL_X32-NEXT: kmovw %ecx, %k0
; KNL_X32-NEXT: kshiftlw $15, %k0, %k0
; KNL_X32-NEXT: kshiftrw $14, %k0, %k0
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: korw %k0, %k6, %k0
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $13, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k2, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $12, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k3, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $11, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k4, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $10, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k5, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $9, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: andl $1, %eax
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; KNL_X32-NEXT: kmovw %ecx, %k0
; KNL_X32-NEXT: kshiftlw $15, %k0, %k0
; KNL_X32-NEXT: kshiftrw $14, %k0, %k0
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: korw %k0, %k6, %k0
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $13, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k2, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $12, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k3, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $11, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k4, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $10, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k5, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $9, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: andl $1, %eax
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; KNL_X32-NEXT: kmovw %ecx, %k0
; KNL_X32-NEXT: kshiftlw $15, %k0, %k0
; KNL_X32-NEXT: kshiftrw $14, %k0, %k0
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: korw %k0, %k6, %k0
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $13, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k2, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $12, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k3, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $11, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k4, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $10, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k5, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $9, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: andl $1, %eax
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; KNL_X32-NEXT: kmovw %ecx, %k0
; KNL_X32-NEXT: kshiftlw $15, %k0, %k0
; KNL_X32-NEXT: kshiftrw $14, %k0, %k0
; KNL_X32-NEXT: kmovw %eax, %k7
; KNL_X32-NEXT: korw %k0, %k7, %k0
; KNL_X32-NEXT: kandw %k1, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k7
; KNL_X32-NEXT: kshiftlw $15, %k7, %k7
; KNL_X32-NEXT: kshiftrw $13, %k7, %k7
; KNL_X32-NEXT: korw %k7, %k0, %k0
; KNL_X32-NEXT: kandw %k2, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k7
; KNL_X32-NEXT: kshiftlw $15, %k7, %k7
; KNL_X32-NEXT: kshiftrw $12, %k7, %k7
; KNL_X32-NEXT: korw %k7, %k0, %k0
; KNL_X32-NEXT: kandw %k3, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k7
; KNL_X32-NEXT: kshiftlw $15, %k7, %k7
; KNL_X32-NEXT: kshiftrw $11, %k7, %k7
; KNL_X32-NEXT: korw %k7, %k0, %k0
; KNL_X32-NEXT: kandw %k4, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k7
; KNL_X32-NEXT: kshiftlw $15, %k7, %k7
; KNL_X32-NEXT: kshiftrw $10, %k7, %k7
; KNL_X32-NEXT: korw %k7, %k0, %k0
; KNL_X32-NEXT: kandw %k5, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k7
; KNL_X32-NEXT: kshiftlw $15, %k7, %k7
; KNL_X32-NEXT: kshiftrw $9, %k7, %k7
; KNL_X32-NEXT: korw %k7, %k0, %k0
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: andl $1, %eax
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; KNL_X32-NEXT: kmovw %ecx, %k7
; KNL_X32-NEXT: kshiftlw $15, %k7, %k7
; KNL_X32-NEXT: kshiftrw $14, %k7, %k7
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: korw %k7, %k6, %k6
; KNL_X32-NEXT: kandw %k1, %k6, %k1
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $13, %k6, %k6
; KNL_X32-NEXT: korw %k6, %k1, %k1
; KNL_X32-NEXT: kandw %k2, %k1, %k1
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $12, %k2, %k2
; KNL_X32-NEXT: korw %k2, %k1, %k1
; KNL_X32-NEXT: kandw %k3, %k1, %k1
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $11, %k2, %k2
; KNL_X32-NEXT: korw %k2, %k1, %k1
; KNL_X32-NEXT: kandw %k4, %k1, %k1
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $10, %k2, %k2
; KNL_X32-NEXT: korw %k2, %k1, %k1
; KNL_X32-NEXT: kandw %k5, %k1, %k1
-; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $9, %k2, %k2
define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
; KNL-LABEL: test21:
; KNL: # %bb.0:
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $14, %k1, %k1
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kmovw %k1, %k7
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $13, %k1, %k1
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $12, %k1, %k1
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $11, %k1, %k1
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kmovw %k1, %k2
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $10, %k1, %k1
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $9, %k1, %k1
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kmovw %k1, %k3
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $8, %k1, %k1
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $7, %k1, %k1
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kmovw %k1, %k4
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $6, %k1, %k1
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $5, %k1, %k1
; KNL-NEXT: movw $-2049, %ax # imm = 0xF7FF
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kandw %k5, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $4, %k1, %k1
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $3, %k1, %k1
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $2, %k1, %k1
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; KNL-NEXT: kandw %k0, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $14, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
; KNL-NEXT: kandw %k2, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $9, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kandw %k3, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $8, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
; KNL-NEXT: kandw %k3, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $7, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kandw %k4, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $6, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
; KNL-NEXT: kandw %k4, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $5, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; KNL-NEXT: kandw %k5, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $4, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
; KNL-NEXT: kandw %k6, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $3, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
; KNL-NEXT: kandw %k6, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $2, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
; KNL-NEXT: kandw %k6, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $14, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: andl $1, %eax
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $14, %k1, %k1
; KNL-NEXT: korw %k1, %k6, %k1
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
; KNL-NEXT: kandw %k6, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $13, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kandw %k7, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $12, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kandw %k0, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $11, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
; KNL-NEXT: kandw %k0, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $10, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kandw %k2, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $9, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
; KNL-NEXT: kandw %k0, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $8, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kandw %k3, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $7, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
; KNL-NEXT: kandw %k3, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $6, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kandw %k4, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $5, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kandw %k5, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $4, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
; KNL-NEXT: kandw %k0, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $3, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
; KNL-NEXT: kandw %k2, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $2, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
; KNL-NEXT: kandw %k5, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $14, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: korw %k6, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: andl $1, %eax
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $14, %k6, %k6
; KNL-NEXT: korw %k6, %k7, %k6
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
; KNL-NEXT: kandw %k5, %k6, %k6
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $13, %k7, %k7
; KNL-NEXT: korw %k7, %k6, %k6
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
; KNL-NEXT: kandw %k5, %k6, %k6
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $12, %k7, %k7
; KNL-NEXT: korw %k7, %k6, %k6
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
; KNL-NEXT: kandw %k5, %k6, %k6
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $11, %k7, %k7
; KNL-NEXT: korw %k7, %k6, %k6
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
; KNL-NEXT: kandw %k5, %k6, %k6
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $10, %k7, %k7
; KNL-NEXT: korw %k7, %k6, %k6
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
; KNL-NEXT: kandw %k5, %k6, %k6
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $9, %k7, %k7
; KNL-NEXT: korw %k7, %k6, %k6
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
; KNL-NEXT: kandw %k5, %k6, %k6
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $8, %k7, %k7
; KNL-NEXT: korw %k7, %k6, %k6
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
; KNL-NEXT: kandw %k5, %k6, %k6
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $7, %k7, %k7
; KNL-NEXT: korw %k7, %k6, %k6
; KNL-NEXT: kandw %k3, %k6, %k6
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $6, %k7, %k7
; KNL-NEXT: korw %k7, %k6, %k6
; KNL-NEXT: kandw %k4, %k6, %k5
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $5, %k6, %k6
; KNL-NEXT: korw %k6, %k5, %k5
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
; KNL-NEXT: kandw %k3, %k5, %k4
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kshiftlw $15, %k5, %k5
; KNL-NEXT: kshiftrw $4, %k5, %k5
; KNL-NEXT: korw %k5, %k4, %k4
; KNL-NEXT: kandw %k0, %k4, %k3
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $3, %k4, %k4
; KNL-NEXT: korw %k4, %k3, %k3
; KNL-NEXT: kandw %k2, %k3, %k2
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $2, %k3, %k3
; KNL-NEXT: korw %k3, %k2, %k2
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload
; KNL-NEXT: kandw %k0, %k2, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kshiftlw $14, %k2, %k2
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: korw %k2, %k0, %k2
;
; AVX512DQNOBW-LABEL: test21:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: andl $1, %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $14, %k1, %k1
; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw %k1, %k7
; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $13, %k1, %k1
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $12, %k1, %k1
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $11, %k1, %k1
; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw %k1, %k2
; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $10, %k1, %k1
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $9, %k1, %k1
; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw %k1, %k3
; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $8, %k1, %k1
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $7, %k1, %k1
; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw %k1, %k4
; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $6, %k1, %k1
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $5, %k1, %k1
; AVX512DQNOBW-NEXT: movw $-2049, %ax # imm = 0xF7FF
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $4, %k1, %k1
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $3, %k1, %k1
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $2, %k1, %k1
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $9, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $8, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $7, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $6, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $4, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $3, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $2, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: andl $1, %eax
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; AVX512DQNOBW-NEXT: kmovw %ecx, %k0
; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0
; AVX512DQNOBW-NEXT: korw %k0, %k6, %k0
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $13, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $12, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $11, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $10, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $9, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $8, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $7, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $6, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $4, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $3, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $2, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: andl $1, %eax
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; AVX512DQNOBW-NEXT: kmovw %ecx, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $14, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k7, %k6
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k7
; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
; AVX512DQNOBW-NEXT: kshiftrw $13, %k7, %k7
; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k7
; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
; AVX512DQNOBW-NEXT: kshiftrw $12, %k7, %k7
; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k7
; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
; AVX512DQNOBW-NEXT: kshiftrw $11, %k7, %k7
; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k7
; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
; AVX512DQNOBW-NEXT: kshiftrw $10, %k7, %k7
; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k7
; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
; AVX512DQNOBW-NEXT: kshiftrw $9, %k7, %k7
; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k7
; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
; AVX512DQNOBW-NEXT: kshiftrw $8, %k7, %k7
; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k7
; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
; AVX512DQNOBW-NEXT: kshiftrw $7, %k7, %k7
; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
; AVX512DQNOBW-NEXT: kandw %k3, %k6, %k6
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k7
; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7
; AVX512DQNOBW-NEXT: kshiftrw $6, %k7, %k7
; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6
; AVX512DQNOBW-NEXT: kandw %k4, %k6, %k5
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k6
; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6
; AVX512DQNOBW-NEXT: korw %k6, %k5, %k5
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k3, %k5, %k4
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k5
; AVX512DQNOBW-NEXT: kshiftlw $15, %k5, %k5
; AVX512DQNOBW-NEXT: kshiftrw $4, %k5, %k5
; AVX512DQNOBW-NEXT: korw %k5, %k4, %k4
; AVX512DQNOBW-NEXT: kandw %k1, %k4, %k3
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k4
; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQNOBW-NEXT: kshiftrw $3, %k4, %k4
; AVX512DQNOBW-NEXT: korw %k4, %k3, %k3
; AVX512DQNOBW-NEXT: kandw %k2, %k3, %k2
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k3
; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3
; AVX512DQNOBW-NEXT: kshiftrw $2, %k3, %k3
; AVX512DQNOBW-NEXT: korw %k3, %k2, %k2
; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload
; AVX512DQNOBW-NEXT: kandw %k1, %k2, %k1
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2
; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
-; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQNOBW-NEXT: kmovw %eax, %k2
; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1
define void @load_v3i1_broadcast_1_v1i1_store(ptr %a0,ptr %a1) {
; AVX512-LABEL: load_v3i1_broadcast_1_v1i1_store:
; AVX512: # %bb.0:
-; AVX512-NEXT: movb (%rdi), %al
+; AVX512-NEXT: movzbl (%rdi), %eax
; AVX512-NEXT: shrb %al
; AVX512-NEXT: xorl %ecx, %ecx
; AVX512-NEXT: testb $1, %al
;
; AVX512NOTDQ-LABEL: load_v3i1_broadcast_1_v1i1_store:
; AVX512NOTDQ: # %bb.0:
-; AVX512NOTDQ-NEXT: movb (%rdi), %al
+; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax
; AVX512NOTDQ-NEXT: shrb %al
; AVX512NOTDQ-NEXT: xorl %ecx, %ecx
; AVX512NOTDQ-NEXT: testb $1, %al
define i16 @test16(ptr%addr, i16 %a) {
; KNL-LABEL: test16:
; KNL: ## %bb.0:
-; KNL-NEXT: movb (%rdi), %al
+; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: kmovw %esi, %k0
; KNL-NEXT: movw $-1025, %cx ## imm = 0xFBFF
; KNL-NEXT: kmovw %ecx, %k1
define i8 @test17(ptr%addr, i8 %a) {
; KNL-LABEL: test17:
; KNL: ## %bb.0:
-; KNL-NEXT: movb (%rdi), %al
+; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: kmovw %esi, %k0
; KNL-NEXT: movw $-17, %cx
; KNL-NEXT: kmovw %ecx, %k1
; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi
; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: andl $15, %edi
-; CHECK-NEXT: movb -24(%rsp,%rdi), %al
+; CHECK-NEXT: movzbl -24(%rsp,%rdi), %eax
; CHECK-NEXT: retq
%t2 = extractelement <16 x i8> %t1, i32 %index
ret i8 %t2
; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi
; CHECK-NEXT: vmovaps %ymm0, (%rsp)
; CHECK-NEXT: andl $31, %edi
-; CHECK-NEXT: movb (%rsp,%rdi), %al
+; CHECK-NEXT: movzbl (%rsp,%rdi), %eax
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi
; CHECK-NEXT: vmovaps %zmm0, (%rsp)
; CHECK-NEXT: andl $63, %edi
-; CHECK-NEXT: movb (%rsp,%rdi), %al
+; CHECK-NEXT: movzbl (%rsp,%rdi), %eax
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vmovaps %zmm0, (%rsp)
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: andl $63, %eax
-; CHECK-NEXT: movb (%rsp,%rax), %al
+; CHECK-NEXT: movzbl (%rsp,%rax), %eax
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: vzeroupper
define <4 x float> @test_mm_mask_fmadd_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_fmadd_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2]
; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2
define <4 x float> @test_mm_mask_fmadd_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_fmadd_round_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2]
; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2
define <4 x float> @test_mm_maskz_fmadd_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fmadd_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
define <4 x float> @test_mm_maskz_fmadd_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fmadd_round_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
define <4 x float> @test_mm_mask3_fmadd_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmadd_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd1]
; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2
define <4 x float> @test_mm_mask3_fmadd_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmadd_round_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd1]
; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2
define <4 x float> @test_mm_mask_fmsub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_fmsub_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2]
; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2
define <4 x float> @test_mm_mask_fmsub_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_fmsub_round_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2]
; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2
define <4 x float> @test_mm_maskz_fmsub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fmsub_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2]
; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
define <4 x float> @test_mm_maskz_fmsub_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fmsub_round_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2]
; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
define <4 x float> @test_mm_mask3_fmsub_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmsub_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1]
; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2
define <4 x float> @test_mm_mask3_fmsub_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmsub_round_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1]
; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2
define <4 x float> @test_mm_mask_fnmadd_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_fnmadd_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2]
; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2
define <4 x float> @test_mm_mask_fnmadd_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_fnmadd_round_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2]
; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2
define <4 x float> @test_mm_maskz_fnmadd_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fnmadd_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2]
; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
define <4 x float> @test_mm_maskz_fnmadd_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fnmadd_round_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2]
; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
define <4 x float> @test_mm_mask3_fnmadd_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmadd_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbd,0xd1]
; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2
define <4 x float> @test_mm_mask3_fnmadd_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmadd_round_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbd,0xd1]
; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2
define <4 x float> @test_mm_mask_fnmsub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_fnmsub_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2]
; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2
define <4 x float> @test_mm_mask_fnmsub_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_fnmsub_round_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2]
; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2
define <4 x float> @test_mm_maskz_fnmsub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fnmsub_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2]
; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
define <4 x float> @test_mm_maskz_fnmsub_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fnmsub_round_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2]
; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
define <4 x float> @test_mm_mask3_fnmsub_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmsub_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1]
; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
define <4 x float> @test_mm_mask3_fnmsub_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmsub_round_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1]
; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
define <2 x double> @test_mm_mask_fmadd_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_fmadd_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2]
; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2
define <2 x double> @test_mm_mask_fmadd_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_fmadd_round_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2]
; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2
define <2 x double> @test_mm_maskz_fmadd_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fmadd_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2]
; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
define <2 x double> @test_mm_maskz_fmadd_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fmadd_round_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2]
; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
define <2 x double> @test_mm_mask3_fmadd_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmadd_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd1]
; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2
define <2 x double> @test_mm_mask3_fmadd_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmadd_round_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd1]
; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2
define <2 x double> @test_mm_mask_fmsub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_fmsub_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2]
; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2
define <2 x double> @test_mm_mask_fmsub_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_fmsub_round_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2]
; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2
define <2 x double> @test_mm_maskz_fmsub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fmsub_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2]
; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
define <2 x double> @test_mm_maskz_fmsub_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fmsub_round_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2]
; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
define <2 x double> @test_mm_mask3_fmsub_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmsub_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1]
; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2
define <2 x double> @test_mm_mask3_fmsub_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmsub_round_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1]
; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2
define <2 x double> @test_mm_mask_fnmadd_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_fnmadd_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2]
; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2
define <2 x double> @test_mm_mask_fnmadd_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_fnmadd_round_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2]
; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2
define <2 x double> @test_mm_maskz_fnmadd_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fnmadd_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2]
; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
define <2 x double> @test_mm_maskz_fnmadd_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fnmadd_round_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2]
; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
define <2 x double> @test_mm_mask3_fnmadd_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmadd_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbd,0xd1]
; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2
define <2 x double> @test_mm_mask3_fnmadd_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmadd_round_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbd,0xd1]
; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2
define <2 x double> @test_mm_mask_fnmsub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_fnmsub_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2]
; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2
define <2 x double> @test_mm_mask_fnmsub_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_fnmsub_round_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2]
; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2
define <2 x double> @test_mm_maskz_fnmsub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fnmsub_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2]
; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
define <2 x double> @test_mm_maskz_fnmsub_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fnmsub_round_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2]
; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
define <2 x double> @test_mm_mask3_fnmsub_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmsub_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1]
; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
define <2 x double> @test_mm_mask3_fnmsub_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmsub_round_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1]
; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
define <4 x float> @test_mm_mask_add_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_add_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vaddss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x58,0xc2]
; X86-NEXT: retl ## encoding: [0xc3]
define <4 x float> @test_mm_maskz_add_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_maskz_add_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vaddss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x58,0xc1]
; X86-NEXT: retl ## encoding: [0xc3]
define <2 x double> @test_mm_mask_add_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_add_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vaddsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x58,0xc2]
; X86-NEXT: retl ## encoding: [0xc3]
define <2 x double> @test_mm_maskz_add_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_maskz_add_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vaddsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x58,0xc1]
; X86-NEXT: retl ## encoding: [0xc3]
define <4 x float> @test_mm_mask_sub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_sub_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vsubss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x5c,0xc2]
; X86-NEXT: retl ## encoding: [0xc3]
define <4 x float> @test_mm_maskz_sub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_maskz_sub_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vsubss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x5c,0xc1]
; X86-NEXT: retl ## encoding: [0xc3]
define <2 x double> @test_mm_mask_sub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_sub_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vsubsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x5c,0xc2]
; X86-NEXT: retl ## encoding: [0xc3]
define <2 x double> @test_mm_maskz_sub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_maskz_sub_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vsubsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x5c,0xc1]
; X86-NEXT: retl ## encoding: [0xc3]
define <4 x float> @test_mm_mask_mul_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_mul_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmulss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x59,0xc2]
; X86-NEXT: retl ## encoding: [0xc3]
define <4 x float> @test_mm_maskz_mul_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_maskz_mul_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmulss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x59,0xc1]
; X86-NEXT: retl ## encoding: [0xc3]
define <2 x double> @test_mm_mask_mul_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_mul_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmulsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x59,0xc2]
; X86-NEXT: retl ## encoding: [0xc3]
define <2 x double> @test_mm_maskz_mul_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_maskz_mul_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmulsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x59,0xc1]
; X86-NEXT: retl ## encoding: [0xc3]
define <4 x float> @test_mm_mask_div_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_div_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vdivss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x5e,0xc2]
; X86-NEXT: retl ## encoding: [0xc3]
define <4 x float> @test_mm_maskz_div_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_maskz_div_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vdivss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x5e,0xc1]
; X86-NEXT: retl ## encoding: [0xc3]
define <2 x double> @test_mm_mask_div_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_div_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vdivsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x5e,0xc2]
; X86-NEXT: retl ## encoding: [0xc3]
define <2 x double> @test_mm_maskz_div_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_maskz_div_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vdivsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x5e,0xc1]
; X86-NEXT: retl ## encoding: [0xc3]
define <8 x double> @test_mm512_mask_shuffle_f64x2(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; X86-LABEL: test_mm512_mask_shuffle_f64x2:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3],zmm2[0,1,0,1]
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_shuffle_f64x2(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; X86-LABEL: test_mm512_maskz_shuffle_f64x2:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,0,1]
; X86-NEXT: retl
define <8 x i64> @test_mm512_mask_shuffle_i64x2(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; X86-LABEL: test_mm512_mask_shuffle_i64x2:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3],zmm2[0,1,0,1]
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_shuffle_i64x2(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; X86-LABEL: test_mm512_maskz_shuffle_i64x2:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,0,1]
; X86-NEXT: retl
define zeroext i8 @test_mm512_mask_testn_epi64_mask(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_testn_epi64_mask:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vptestnmq %zmm0, %zmm1, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
define zeroext i8 @test_mm512_mask_test_epi64_mask(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_test_epi64_mask:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
define <8 x i64> @test_mm512_mask_set1_epi64(<8 x i64> %__O, i8 zeroext %__M, i64 %__A) {
; X86-LABEL: test_mm512_mask_set1_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
; X86-NEXT: kmovw %eax, %k1
define <8 x i64> @test_mm512_maskz_set1_epi64(i8 zeroext %__M, i64 %__A) {
; X86-LABEL: test_mm512_maskz_set1_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT: kmovw %eax, %k1
define <8 x i64> @test_mm512_mask_broadcastq_epi64(<8 x i64> %a0, i8 %a1, <2 x i64> %a2) {
; X86-LABEL: test_mm512_mask_broadcastq_epi64:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastq %xmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) {
; X86-LABEL: test_mm512_maskz_broadcastq_epi64:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_broadcastsd_pd(<8 x double> %a0, i8 %a1, <2 x double> %a2) {
; X86-LABEL: test_mm512_mask_broadcastsd_pd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastsd %xmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) {
; X86-LABEL: test_mm512_maskz_broadcastsd_pd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_movedup_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
; X86-LABEL: test_mm512_mask_movedup_pd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6]
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_movedup_pd(i8 %a0, <8 x double> %a1) {
; X86-LABEL: test_mm512_maskz_movedup_pd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_permute_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
; X86-LABEL: test_mm512_mask_permute_pd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,2,4,4,6,6]
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_permute_pd(i8 %a0, <8 x double> %a1) {
; X86-LABEL: test_mm512_maskz_permute_pd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,6]
; X86-NEXT: retl
define <8 x i64> @test_mm512_mask_permutex_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2) {
; X86-LABEL: test_mm512_mask_permutex_epi64:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermq {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_permutex_epi64(i8 %a0, <8 x i64> %a1) {
; X86-LABEL: test_mm512_maskz_permutex_epi64:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_permutex_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
; X86-LABEL: test_mm512_mask_permutex_pd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_permutex_pd(i8 %a0, <8 x double> %a1) {
; X86-LABEL: test_mm512_maskz_permutex_pd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_shuffle_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
; X86-LABEL: test_mm512_mask_shuffle_pd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[3],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_shuffle_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
; X86-LABEL: test_mm512_maskz_shuffle_pd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X86-NEXT: retl
define <8 x i64> @test_mm512_mask_unpackhi_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2, <8 x i64> %a3) {
; X86-LABEL: test_mm512_mask_unpackhi_epi64:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpunpckhqdq {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_unpackhi_epi64(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
; X86-LABEL: test_mm512_maskz_unpackhi_epi64:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpunpckhqdq {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_unpackhi_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
; X86-LABEL: test_mm512_mask_unpackhi_pd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_unpackhi_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
; X86-LABEL: test_mm512_maskz_unpackhi_pd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; X86-NEXT: retl
define <8 x i64> @test_mm512_mask_unpacklo_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2, <8 x i64> %a3) {
; X86-LABEL: test_mm512_mask_unpacklo_epi64:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpunpcklqdq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_unpacklo_epi64(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
; X86-LABEL: test_mm512_maskz_unpacklo_epi64:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpunpcklqdq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_unpacklo_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
; X86-LABEL: test_mm512_mask_unpacklo_pd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_unpacklo_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
; X86-LABEL: test_mm512_maskz_unpacklo_pd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_mul_epi32(i8 zeroext %__k, <8 x i64> %__A, <8 x i64> %__B) nounwind {
; X86-LABEL: test_mm512_maskz_mul_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmuldq %zmm0, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x i64> @test_mm512_mask_mul_epi32(i8 zeroext %__k, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__src) nounwind {
; X86-LABEL: test_mm512_mask_mul_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmuldq %zmm0, %zmm1, %zmm2 {%k1}
; X86-NEXT: vmovdqa64 %zmm2, %zmm0
define <8 x i64> @test_mm512_maskz_mul_epu32(i8 zeroext %__k, <8 x i64> %__A, <8 x i64> %__B) nounwind {
; X86-LABEL: test_mm512_maskz_mul_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x i64> @test_mm512_mask_mul_epu32(i8 zeroext %__k, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__src) nounwind {
; X86-LABEL: test_mm512_mask_mul_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 {%k1}
; X86-NEXT: vmovdqa64 %zmm2, %zmm0
define <8 x double> @test_mm512_set1_epi8(i8 signext %d) nounwind {
; X86-LABEL: test_mm512_set1_epi8:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovd %eax, %xmm0
; X86-NEXT: vpbroadcastb %xmm0, %ymm0
; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
define <8 x double> @test_mm512_mask_cvtps_pd(<8 x double> %__W, i8 zeroext %__U, <8 x float> %__A) {
; X86-LABEL: test_mm512_mask_cvtps_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtps2pd %ymm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_cvtpslo_pd(<8 x double> %__W, i8 zeroext %__U, <16 x float> %__A) {
; X86-LABEL: test_mm512_mask_cvtpslo_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtps2pd %ymm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_cvtps_pd(i8 zeroext %__U, <8 x float> %__A) {
; X86-LABEL: test_mm512_maskz_cvtps_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtps2pd %ymm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm512_mask_cvtepi64_epi32(<4 x i64> %__O, i8 zeroext %__M, <8 x i64> %__A) {
; X86-LABEL: test_mm512_mask_cvtepi64_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmovqd %zmm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm512_maskz_cvtepi64_epi32(i8 zeroext %__M, <8 x i64> %__A) {
; X86-LABEL: test_mm512_maskz_cvtepi64_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmovqd %zmm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm512_mask_cvtepi64_epi16(<2 x i64> %__O, i8 zeroext %__M, <8 x i64> %__A) {
; X86-LABEL: test_mm512_mask_cvtepi64_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmovqw %zmm1, %xmm0 {%k1}
; X86-NEXT: vzeroupper
define <2 x i64> @test_mm512_maskz_cvtepi64_epi16(i8 zeroext %__M, <8 x i64> %__A) {
; X86-LABEL: test_mm512_maskz_cvtepi64_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmovqw %zmm0, %xmm0 {%k1} {z}
; X86-NEXT: vzeroupper
define <8 x i64> @test_mm512_mask_ternarylogic_epi64(<8 x i64> %__A, i8 zeroext %__U, <8 x i64> %__B, <8 x i64> %__C) {
; X86-LABEL: test_mm512_mask_ternarylogic_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_ternarylogic_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
; X86-LABEL: test_mm512_maskz_ternarylogic_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask2_permutex2var_pd(<8 x double> %__A, <8 x i64> %__I, i8 zeroext %__U, <8 x double> %__B) {
; X86-LABEL: test_mm512_mask2_permutex2var_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 {%k1}
; X86-NEXT: vmovapd %zmm1, %zmm0
define <8 x i64> @test_mm512_mask2_permutex2var_epi64(<8 x i64> %__A, <8 x i64> %__I, i8 zeroext %__U, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask2_permutex2var_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 {%k1}
; X86-NEXT: vmovdqa64 %zmm1, %zmm0
define <8 x double> @test_mm512_mask_permutex2var_pd(<8 x double> %__A, i8 zeroext %__U, <8 x i64> %__I, <8 x double> %__B) {
; X86-LABEL: test_mm512_mask_permutex2var_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2pd %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_permutex2var_pd(i8 zeroext %__U, <8 x double> %__A, <8 x i64> %__I, <8 x double> %__B) {
; X86-LABEL: test_mm512_maskz_permutex2var_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2pd %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x i64> @test_mm512_mask_permutex2var_epi64(<8 x i64> %__A, i8 zeroext %__U, <8 x i64> %__I, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_permutex2var_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2q %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_permutex2var_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_permutex2var_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2q %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x float> @test_mm_mask_add_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_add_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vaddss %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_add_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_maskz_add_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vaddss %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x double> @test_mm_mask_add_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_add_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vaddsd %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_add_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_maskz_add_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vaddsd %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x float> @test_mm_mask_sub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_sub_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsubss %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_sub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_maskz_sub_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsubss %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x double> @test_mm_mask_sub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_sub_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsubsd %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_sub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_maskz_sub_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsubsd %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x float> @test_mm_mask_mul_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_mul_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmulss %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_mul_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_maskz_mul_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmulss %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x double> @test_mm_mask_mul_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_mul_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmulsd %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_mul_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_maskz_mul_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmulsd %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x float> @test_mm_mask_div_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_div_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vdivss %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_div_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_maskz_div_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vdivss %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x double> @test_mm_mask_div_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_div_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vdivsd %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_div_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_maskz_div_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vdivsd %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_fmadd_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_mask_fmadd_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask3_fmadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmadd_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_mm512_maskz_fmadd_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_maskz_fmadd_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_fmsub_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_mask_fmsub_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_fmsub_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_maskz_fmsub_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask3_fnmadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fnmadd_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_mm512_maskz_fnmadd_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_maskz_fnmadd_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_fnmsub_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_maskz_fnmsub_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_fmadd_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_mask_fmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) + zmm2
; X86-NEXT: retl
define <8 x double> @test_mm512_mask3_fmadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) + zmm2
; X86-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_mm512_maskz_fmadd_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_maskz_fmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213pd {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_fmsub_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_mask_fmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) - zmm2
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_fmsub_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_maskz_fmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213pd {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) - zmm2
; X86-NEXT: retl
define <8 x double> @test_mm512_mask3_fnmadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fnmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd231pd {{.*#+}} zmm2 {%k1} = -(zmm0 * zmm1) + zmm2
; X86-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_mm512_maskz_fnmadd_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_maskz_fnmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213pd {{.*#+}} zmm0 {%k1} {z} = -(zmm1 * zmm0) + zmm2
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_fnmsub_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_maskz_fnmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213pd {{.*#+}} zmm0 {%k1} {z} = -(zmm1 * zmm0) - zmm2
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_fmaddsub_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_mask_fmaddsub_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask3_fmaddsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmaddsub_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_mm512_maskz_fmaddsub_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_maskz_fmaddsub_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_fmsubadd_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_mask_fmsubadd_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_fmsubadd_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_maskz_fmsubadd_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_fmaddsub_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_mask_fmaddsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
; X86-NEXT: retl
define <8 x double> @test_mm512_mask3_fmaddsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmaddsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2
; X86-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_mm512_maskz_fmaddsub_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_maskz_fmaddsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub213pd {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_fmsubadd_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_mask_fmsubadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) -/+ zmm2
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_fmsubadd_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_maskz_fmsubadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd213pd {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) -/+ zmm2
; X86-NEXT: retl
define <8 x double> @test_mm512_mask3_fmsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmsub_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_mm512_mask3_fmsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) - zmm2
; X86-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_mm512_mask3_fmsubadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmsubadd_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_mm512_mask3_fmsubadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmsubadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2
; X86-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_mm512_mask_fnmadd_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_mask_fnmadd_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_fnmadd_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_mask_fnmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd132pd {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_fnmsub_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_mask_fnmsub_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask3_fnmsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fnmsub_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovapd %zmm2, %zmm0
define <8 x double> @test_mm512_mask_fnmsub_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
; X86-LABEL: test_mm512_mask_fnmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub132pd {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
; X86-NEXT: retl
define <8 x double> @test_mm512_mask3_fnmsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fnmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub231pd {{.*#+}} zmm2 {%k1} = -(zmm0 * zmm1) - zmm2
; X86-NEXT: vmovapd %zmm2, %zmm0
define <4 x float> @test_mm_mask_fmadd_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_fmadd_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213ss {{.*#+}} xmm0 {%k1} = (xmm1 * xmm0) + xmm2
; X86-NEXT: retl
define <4 x float> @test_mm_mask_fmadd_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_fmadd_round_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_fmadd_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fmadd_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213ss {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_fmadd_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fmadd_round_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x float> @test_mm_mask3_fmadd_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmadd_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd231ss {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) + xmm2
; X86-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_mm_mask3_fmadd_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmadd_round_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_mm_mask_fmsub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_fmsub_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213ss {{.*#+}} xmm0 {%k1} = (xmm1 * xmm0) - xmm2
; X86-NEXT: retl
define <4 x float> @test_mm_mask_fmsub_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_fmsub_round_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_fmsub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fmsub_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213ss {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_fmsub_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fmsub_round_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x float> @test_mm_mask3_fmsub_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmsub_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub231ss {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) - xmm2
; X86-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_mm_mask3_fmsub_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmsub_round_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_mm_mask_fnmadd_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_fnmadd_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213ss {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) + xmm2
; X86-NEXT: retl
define <4 x float> @test_mm_mask_fnmadd_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_fnmadd_round_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_fnmadd_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fnmadd_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213ss {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_fnmadd_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fnmadd_round_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x float> @test_mm_mask3_fnmadd_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmadd_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd231ss {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) + xmm2
; X86-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_mm_mask3_fnmadd_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmadd_round_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_mm_mask_fnmsub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_fnmsub_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213ss {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) - xmm2
; X86-NEXT: retl
define <4 x float> @test_mm_mask_fnmsub_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_fnmsub_round_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_fnmsub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fnmsub_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213ss {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_fnmsub_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fnmsub_round_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x float> @test_mm_mask3_fnmsub_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmsub_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub231ss {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
; X86-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_mm_mask3_fnmsub_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmsub_round_ss:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovaps %xmm2, %xmm0
define <2 x double> @test_mm_mask_fmadd_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_fmadd_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213sd {{.*#+}} xmm0 {%k1} = (xmm1 * xmm0) + xmm2
; X86-NEXT: retl
define <2 x double> @test_mm_mask_fmadd_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_fmadd_round_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_fmadd_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fmadd_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213sd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_fmadd_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fmadd_round_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x double> @test_mm_mask3_fmadd_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmadd_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd231sd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) + xmm2
; X86-NEXT: vmovapd %xmm2, %xmm0
define <2 x double> @test_mm_mask3_fmadd_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmadd_round_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovapd %xmm2, %xmm0
define <2 x double> @test_mm_mask_fmsub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_fmsub_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213sd {{.*#+}} xmm0 {%k1} = (xmm1 * xmm0) - xmm2
; X86-NEXT: retl
define <2 x double> @test_mm_mask_fmsub_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_fmsub_round_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_fmsub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fmsub_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213sd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_fmsub_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fmsub_round_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x double> @test_mm_mask3_fmsub_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmsub_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub231sd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) - xmm2
; X86-NEXT: vmovapd %xmm2, %xmm0
define <2 x double> @test_mm_mask3_fmsub_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmsub_round_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovapd %xmm2, %xmm0
define <2 x double> @test_mm_mask_fnmadd_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_fnmadd_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213sd {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) + xmm2
; X86-NEXT: retl
define <2 x double> @test_mm_mask_fnmadd_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_fnmadd_round_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_fnmadd_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fnmadd_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213sd {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_fnmadd_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fnmadd_round_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x double> @test_mm_mask3_fnmadd_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmadd_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd231sd {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) + xmm2
; X86-NEXT: vmovapd %xmm2, %xmm0
define <2 x double> @test_mm_mask3_fnmadd_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmadd_round_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovapd %xmm2, %xmm0
define <2 x double> @test_mm_mask_fnmsub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_fnmsub_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213sd {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) - xmm2
; X86-NEXT: retl
define <2 x double> @test_mm_mask_fnmsub_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_fnmsub_round_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_fnmsub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fnmsub_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213sd {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_fnmsub_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fnmsub_round_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x double> @test_mm_mask3_fnmsub_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmsub_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub231sd {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
; X86-NEXT: vmovapd %xmm2, %xmm0
define <2 x double> @test_mm_mask3_fnmsub_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmsub_round_sd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovapd %xmm2, %xmm0
; X86-LABEL: test_mm512_mask_expandloadu_epi64:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1}
; X86-NEXT: retl
; X86-LABEL: test_mm512_maskz_expandloadu_epi64:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} {z}
; X86-NEXT: retl
; X86-LABEL: test_mm512_mask_expandloadu_pd:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1}
; X86-NEXT: retl
; X86-LABEL: test_mm512_maskz_expandloadu_pd:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} {z}
; X86-NEXT: retl
define void @test_mm512_mask_compressstoreu_pd(ptr %__P, i8 zeroext %__U, <8 x double> %__A) {
; X86-LABEL: test_mm512_mask_compressstoreu_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcompresspd %zmm0, (%ecx) {%k1}
define void @test_mm512_mask_compressstoreu_epi64(ptr %__P, i8 zeroext %__U, <8 x i64> %__A) {
; X86-LABEL: test_mm512_mask_compressstoreu_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpcompressq %zmm0, (%ecx) {%k1}
define i64 @test_mm512_mask_reduce_add_epi64(i8 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_add_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: vextracti64x4 $1, %zmm0, %ymm1
define i64 @test_mm512_mask_reduce_mul_epi64(i8 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_mul_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0]
; X86-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
define i64 @test_mm512_mask_reduce_and_epi64(i8 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_and_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; X86-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
define i64 @test_mm512_mask_reduce_or_epi64(i8 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_or_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; X86-NEXT: .cfi_def_cfa_register %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
-; X86-NEXT: movb 8(%ebp), %al
+; X86-NEXT: movzbl 8(%ebp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; X86-NEXT: .cfi_def_cfa_register %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
-; X86-NEXT: movb 8(%ebp), %al
+; X86-NEXT: movzbl 8(%ebp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastsd {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; X86-NEXT: vmovapd %zmm0, %zmm1 {%k1}
define i64 @test_mm512_mask_reduce_max_epi64(i8 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_max_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648,0,2147483648,0,2147483648,0,2147483648,0,2147483648]
; X86-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
define i64 @test_mm512_mask_reduce_max_epu64(i8 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_max_epu64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,2,3]
; X86-NEXT: .cfi_def_cfa_register %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
-; X86-NEXT: movb 8(%ebp), %al
+; X86-NEXT: movzbl 8(%ebp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastsd {{.*#+}} zmm1 = [-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf]
; X86-NEXT: vmovapd %zmm0, %zmm1 {%k1}
define i64 @test_mm512_mask_reduce_min_epi64(i8 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_min_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4294967295,2147483647,4294967295,2147483647,4294967295,2147483647,4294967295,2147483647,4294967295,2147483647,4294967295,2147483647,4294967295,2147483647,4294967295,2147483647]
; X86-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
define i64 @test_mm512_mask_reduce_min_epu64(i8 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_min_epu64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; X86-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
; X86-NEXT: .cfi_def_cfa_register %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
-; X86-NEXT: movb 8(%ebp), %al
+; X86-NEXT: movzbl 8(%ebp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastsd {{.*#+}} zmm1 = [+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf]
; X86-NEXT: vmovapd %zmm0, %zmm1 {%k1}
define <8 x double> @test_mm512_mask_max_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; X86-LABEL: test_mm512_mask_max_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_max_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; X86-LABEL: test_mm512_maskz_max_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_max_round_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; X86-LABEL: test_mm512_mask_max_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_max_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; X86-LABEL: test_mm512_maskz_max_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_min_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; X86-LABEL: test_mm512_mask_min_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_min_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; X86-LABEL: test_mm512_maskz_min_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vminpd %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_min_round_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; X86-LABEL: test_mm512_mask_min_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_min_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; X86-LABEL: test_mm512_maskz_min_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vminpd %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_sqrt_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A) {
; X86-LABEL: test_mm512_mask_sqrt_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtpd %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_sqrt_pd(i8 zeroext %__U, <8 x double> %__A) {
; X86-LABEL: test_mm512_maskz_sqrt_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtpd %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x double> @test_mm512_mask_sqrt_round_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A) {
; X86-LABEL: test_mm512_mask_sqrt_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtpd {rn-sae}, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x double> @test_mm512_maskz_sqrt_round_pd(i8 zeroext %__U, <8 x double> %__A) {
; X86-LABEL: test_mm512_maskz_sqrt_round_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtpd {rn-sae}, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x i64> @test_mm512_mask_rol_epi64(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A) {
; X86-LABEL: test_mm512_mask_rol_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolq $5, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_rol_epi64(i8 zeroext %__U, <8 x i64> %__A) {
; X86-LABEL: test_mm512_maskz_rol_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolq $5, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x i64> @test_mm512_mask_rolv_epi64(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_rolv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolvq %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_rolv_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_rolv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x i64> @test_mm512_mask_ror_epi64(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A) {
; X86-LABEL: test_mm512_mask_ror_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorq $5, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_ror_epi64(i8 zeroext %__U, <8 x i64> %__A) {
; X86-LABEL: test_mm512_maskz_ror_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorq $5, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x i64> @test_mm512_mask_rorv_epi64(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_rorv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorvq %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_rorv_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_rorv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_move_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x10,0xc2]
; X86-NEXT: retl ## encoding: [0xc3]
define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_maskz_move_ss:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc1]
; X86-NEXT: retl ## encoding: [0xc3]
define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_move_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x10,0xc2]
; X86-NEXT: retl ## encoding: [0xc3]
define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_maskz_move_sd:
; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc1]
; X86-NEXT: retl ## encoding: [0xc3]
; X86: ## %bb.0:
; X86-NEXT: vptestmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: andb %cl, %al ## encoding: [0x20,0xc8]
; X86-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; X86: ## %bb.0:
; X86-NEXT: vptestnmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc1]
; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: andb %cl, %al ## encoding: [0x20,0xc8]
; X86-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
; X86-LABEL: test_int_x86_avx512_mask_vfmadd_sd:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xda]
define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xd8]
; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xda]
define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_sd:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xda]
define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ss:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_sd:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
; X86-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd9]
define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
; X86-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd9]
define void @fmadd_ss_mask_memfold(ptr %a, ptr %b, i8 %c) {
; X86-LABEL: fmadd_ss_mask_memfold:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c]
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
; X86-NEXT: vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02]
define void @fmadd_ss_maskz_memfold(ptr %a, ptr %b, i8 %c) {
; X86-LABEL: fmadd_ss_maskz_memfold:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c]
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
; X86-NEXT: vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02]
define void @fmadd_sd_mask_memfold(ptr %a, ptr %b, i8 %c) {
; X86-LABEL: fmadd_sd_mask_memfold:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c]
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
; X86-NEXT: vmovsd (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x02]
define void @fmadd_sd_maskz_memfold(ptr %a, ptr %b, i8 %c) {
; X86-LABEL: fmadd_sd_maskz_memfold:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c]
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
; X86-NEXT: vmovsd (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x02]
define <2 x double>@test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_sd:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd9]
define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ss:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd9]
define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd9]
define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd9]
; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ## encoding: [0x8a,0x4c,0x24,0x08]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vfmadd231ss (%eax), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0x08]
; X86-NEXT: ## xmm1 {%k1} = (xmm0 * mem) + xmm1
; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ## encoding: [0x8a,0x4c,0x24,0x08]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
; X86-NEXT: vfmadd132ss (%eax), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x99,0x00]
; X86-NEXT: ## xmm0 {%k1} = (xmm0 * mem) + xmm1
; X86-LABEL: test_mask_store_ss:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vmovss %xmm0, (%eax) {%k1}
; X86-NEXT: retl
;
; X86-LABEL: test_int_x86_avx512_mask_vfmadd_sd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovapd %xmm0, %xmm3
; X86-NEXT: vfmadd213sd {{.*#+}} xmm3 {%k1} = (xmm1 * xmm3) + xmm2
;
; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovaps %xmm0, %xmm3
; X86-NEXT: vfmadd213ss {{.*#+}} xmm3 {%k1} = (xmm1 * xmm3) + xmm2
;
; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_sd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovapd %xmm0, %xmm3
; X86-NEXT: vfmadd213sd {{.*#+}} xmm3 {%k1} {z} = (xmm1 * xmm3) + xmm2
;
; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ss:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovaps %xmm0, %xmm3
; X86-NEXT: vfmadd213ss {{.*#+}} xmm3 {%k1} {z} = (xmm1 * xmm3) + xmm2
; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_load0:
; X86: # %bb.0:
; X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: vmovaps (%ecx), %xmm0
; X86-NEXT: kmovw %eax, %k1
;
; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_sd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovapd %xmm2, %xmm3
; X86-NEXT: vfmadd231sd {{.*#+}} xmm3 {%k1} = (xmm0 * xmm1) + xmm3
;
; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovaps %xmm2, %xmm3
; X86-NEXT: vfmadd231ss {{.*#+}} xmm3 {%k1} = (xmm0 * xmm1) + xmm3
;
; X86-LABEL: fmadd_ss_mask_memfold:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
;
; X86-LABEL: fmadd_ss_maskz_memfold:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
;
; X86-LABEL: fmadd_sd_mask_memfold:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
;
; X86-LABEL: fmadd_sd_maskz_memfold:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
;
; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_sd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovapd %xmm2, %xmm3
; X86-NEXT: vfmsub231sd {{.*#+}} xmm3 {%k1} = (xmm0 * xmm1) - xmm3
;
; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ss:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovaps %xmm2, %xmm3
; X86-NEXT: vfmsub231ss {{.*#+}} xmm3 {%k1} = (xmm0 * xmm1) - xmm3
;
; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovapd %xmm2, %xmm3
; X86-NEXT: vfnmsub231sd {{.*#+}} xmm3 {%k1} = -(xmm0 * xmm1) - xmm3
;
; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovaps %xmm2, %xmm3
; X86-NEXT: vfnmsub231ss {{.*#+}} xmm3 {%k1} = -(xmm0 * xmm1) - xmm3
; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vfmadd231ss {{.*#+}} xmm1 {%k1} = (xmm0 * mem) + xmm1
; X86-NEXT: vmovaps %xmm1, %xmm0
; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vfmadd132ss {{.*#+}} xmm0 {%k1} = (xmm0 * mem) + xmm1
; X86-NEXT: retl
;
; CHECK32-LABEL: test_mm_mask_move_ss:
; CHECK32: # %bb.0: # %entry
-; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al
+; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; CHECK32-NEXT: kmovw %eax, %k1
; CHECK32-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1}
; CHECK32-NEXT: retl
;
; CHECK32-LABEL: test_mm_maskz_move_ss:
; CHECK32: # %bb.0: # %entry
-; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al
+; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; CHECK32-NEXT: kmovw %eax, %k1
; CHECK32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK32-NEXT: retl
;
; CHECK32-LABEL: test_mm_mask_move_sd:
; CHECK32: # %bb.0: # %entry
-; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al
+; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; CHECK32-NEXT: kmovw %eax, %k1
; CHECK32-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1}
; CHECK32-NEXT: retl
;
; CHECK32-LABEL: test_mm_maskz_move_sd:
; CHECK32: # %bb.0: # %entry
-; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al
+; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; CHECK32-NEXT: kmovw %eax, %k1
; CHECK32-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z}
; CHECK32-NEXT: retl
; CHECK32-LABEL: test_mm_mask_store_sd:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: kmovw %ecx, %k1
; CHECK32-NEXT: vmovsd %xmm0, (%eax) {%k1}
; CHECK32-NEXT: retl
; CHECK32-LABEL: test_mm_mask_load_sd:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: kmovw %ecx, %k1
; CHECK32-NEXT: vmovsd (%eax), %xmm0 {%k1}
; CHECK32-NEXT: retl
; CHECK32-LABEL: test_mm_maskz_load_sd:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: kmovw %ecx, %k1
; CHECK32-NEXT: vmovsd (%eax), %xmm0 {%k1} {z}
; CHECK32-NEXT: retl
; CHECK32-LABEL: test_mm_mask_store_ss_2:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: kmovw %ecx, %k1
; CHECK32-NEXT: vmovss %xmm0, (%eax) {%k1}
; CHECK32-NEXT: retl
; CHECK32-LABEL: test_mm_mask_store_sd_2:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: kmovw %ecx, %k1
; CHECK32-NEXT: vmovsd %xmm0, (%eax) {%k1}
; CHECK32-NEXT: retl
; CHECK32-LABEL: test_mm_mask_load_ss_2:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: kmovw %ecx, %k1
; CHECK32-NEXT: vmovss (%eax), %xmm0 {%k1}
; CHECK32-NEXT: retl
; CHECK32-LABEL: test_mm_maskz_load_ss_2:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: kmovw %ecx, %k1
; CHECK32-NEXT: vmovss (%eax), %xmm0 {%k1} {z}
; CHECK32-NEXT: retl
; CHECK32-LABEL: test_mm_mask_load_sd_2:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: kmovw %ecx, %k1
; CHECK32-NEXT: vmovsd (%eax), %xmm0 {%k1}
; CHECK32-NEXT: retl
; CHECK32-LABEL: test_mm_maskz_load_sd_2:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: kmovw %ecx, %k1
; CHECK32-NEXT: vmovsd (%eax), %xmm0 {%k1} {z}
; CHECK32-NEXT: retl
;
; AVX512-ONLY-LABEL: load_v1i2_trunc_v1i1_store:
; AVX512-ONLY: # %bb.0:
-; AVX512-ONLY-NEXT: movb (%rdi), %al
+; AVX512-ONLY-NEXT: movzbl (%rdi), %eax
; AVX512-ONLY-NEXT: andl $1, %eax
; AVX512-ONLY-NEXT: kmovw %eax, %k0
; AVX512-ONLY-NEXT: kmovw %k0, %eax
;
; AVX512-ONLY-LABEL: load_v1i3_trunc_v1i1_store:
; AVX512-ONLY: # %bb.0:
-; AVX512-ONLY-NEXT: movb (%rdi), %al
+; AVX512-ONLY-NEXT: movzbl (%rdi), %eax
; AVX512-ONLY-NEXT: andl $1, %eax
; AVX512-ONLY-NEXT: kmovw %eax, %k0
; AVX512-ONLY-NEXT: kmovw %k0, %eax
;
; AVX512-ONLY-LABEL: load_v1i4_trunc_v1i1_store:
; AVX512-ONLY: # %bb.0:
-; AVX512-ONLY-NEXT: movb (%rdi), %al
+; AVX512-ONLY-NEXT: movzbl (%rdi), %eax
; AVX512-ONLY-NEXT: andl $1, %eax
; AVX512-ONLY-NEXT: kmovw %eax, %k0
; AVX512-ONLY-NEXT: kmovw %k0, %eax
;
; AVX512-ONLY-LABEL: load_v1i8_trunc_v1i1_store:
; AVX512-ONLY: # %bb.0:
-; AVX512-ONLY-NEXT: movb (%rdi), %al
+; AVX512-ONLY-NEXT: movzbl (%rdi), %eax
; AVX512-ONLY-NEXT: andl $1, %eax
; AVX512-ONLY-NEXT: kmovw %eax, %k0
; AVX512-ONLY-NEXT: kmovw %k0, %eax
;
; AVX512-ONLY-LABEL: load_v1i16_trunc_v1i1_store:
; AVX512-ONLY: # %bb.0:
-; AVX512-ONLY-NEXT: movb (%rdi), %al
+; AVX512-ONLY-NEXT: movzbl (%rdi), %eax
; AVX512-ONLY-NEXT: andl $1, %eax
; AVX512-ONLY-NEXT: kmovw %eax, %k0
; AVX512-ONLY-NEXT: kmovw %k0, %eax
;
; AVX512-ONLY-LABEL: load_v1i32_trunc_v1i1_store:
; AVX512-ONLY: # %bb.0:
-; AVX512-ONLY-NEXT: movb (%rdi), %al
+; AVX512-ONLY-NEXT: movzbl (%rdi), %eax
; AVX512-ONLY-NEXT: andl $1, %eax
; AVX512-ONLY-NEXT: kmovw %eax, %k0
; AVX512-ONLY-NEXT: kmovw %k0, %eax
;
; AVX512-ONLY-LABEL: load_v1i64_trunc_v1i1_store:
; AVX512-ONLY: # %bb.0:
-; AVX512-ONLY-NEXT: movb (%rdi), %al
+; AVX512-ONLY-NEXT: movzbl (%rdi), %eax
; AVX512-ONLY-NEXT: andl $1, %eax
; AVX512-ONLY-NEXT: kmovw %eax, %k0
; AVX512-ONLY-NEXT: kmovw %k0, %eax
;
; X86-LABEL: mask8:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: notb %al
; X86-NEXT: retl
%m0 = bitcast i8 %x to <8 x i1>
;
; X86-LABEL: mask8_zext:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: notb %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: retl
;
; X86-LABEL: shuf_test1:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
%v1 = bitcast i16 %v to <16 x i1>
%mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; X86-LABEL: store_i8_i1:
; X86: ## %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $1, %cl
; X86-NEXT: movb %cl, (%eax)
; X86-NEXT: retl
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kmovw %k1, %k2
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $10, %k1, %k1
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $9, %k1, %k1
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kmovw %k1, %k3
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $8, %k1, %k1
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $7, %k1, %k1
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kmovw %k1, %k4
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $6, %k1, %k1
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $5, %k1, %k1
; KNL-NEXT: movw $-2049, %ax ## imm = 0xF7FF
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kandw %k5, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $4, %k1, %k1
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $3, %k1, %k1
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $2, %k1, %k1
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $14, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: andl $1, %eax
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: korw %k0, %k6, %k0
; KNL-NEXT: kandw %k7, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $13, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; KNL-NEXT: kandw %k7, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $12, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $11, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k2, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $10, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; KNL-NEXT: kandw %k2, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $9, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k3, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $8, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
; KNL-NEXT: kandw %k3, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $7, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k4, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $6, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload
; KNL-NEXT: kandw %k4, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $5, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: kandw %k5, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $4, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; KNL-NEXT: kandw %k6, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $3, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; KNL-NEXT: kandw %k6, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $2, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; KNL-NEXT: kandw %k6, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $14, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: andl $1, %eax
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: korw %k0, %k6, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; KNL-NEXT: kandw %k6, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $13, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k7, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $12, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $11, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $10, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k2, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $9, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $8, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k3, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $7, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
; KNL-NEXT: kandw %k3, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $6, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k4, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $5, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kandw %k5, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $4, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT: kandw %k1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $3, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; KNL-NEXT: kandw %k2, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $2, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; KNL-NEXT: kandw %k5, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $14, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: korw %k6, %k0, %k0
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: andl $1, %eax
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT: kmovw %ecx, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $14, %k6, %k6
; KNL-NEXT: korw %k6, %k7, %k6
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; KNL-NEXT: kandw %k5, %k6, %k6
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $13, %k7, %k7
; KNL-NEXT: korw %k7, %k6, %k6
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; KNL-NEXT: kandw %k5, %k6, %k6
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $12, %k7, %k7
; KNL-NEXT: korw %k7, %k6, %k6
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; KNL-NEXT: kandw %k5, %k6, %k6
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $11, %k7, %k7
; KNL-NEXT: korw %k7, %k6, %k6
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; KNL-NEXT: kandw %k5, %k6, %k6
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $10, %k7, %k7
; KNL-NEXT: korw %k7, %k6, %k6
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; KNL-NEXT: kandw %k5, %k6, %k6
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $9, %k7, %k7
; KNL-NEXT: korw %k7, %k6, %k6
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; KNL-NEXT: kandw %k5, %k6, %k6
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $8, %k7, %k7
; KNL-NEXT: korw %k7, %k6, %k6
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; KNL-NEXT: kandw %k5, %k6, %k6
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $7, %k7, %k7
; KNL-NEXT: korw %k7, %k6, %k6
; KNL-NEXT: kandw %k3, %k6, %k6
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $6, %k7, %k7
; KNL-NEXT: korw %k7, %k6, %k6
; KNL-NEXT: kandw %k4, %k6, %k5
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $5, %k6, %k6
; KNL-NEXT: korw %k6, %k5, %k5
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
; KNL-NEXT: kandw %k3, %k5, %k4
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k5
; KNL-NEXT: kshiftlw $15, %k5, %k5
; KNL-NEXT: kshiftrw $4, %k5, %k5
; KNL-NEXT: korw %k5, %k4, %k4
; KNL-NEXT: kandw %k1, %k4, %k3
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $3, %k4, %k4
; KNL-NEXT: korw %k4, %k3, %k3
; KNL-NEXT: kandw %k2, %k3, %k2
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $2, %k3, %k3
; KNL-NEXT: korw %k3, %k2, %k2
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT: kandw %k1, %k2, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kshiftlw $14, %k2, %k2
; KNL-NEXT: korw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
-; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al
+; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: korw %k2, %k1, %k1
; AVX512DQ-NEXT: kandw %k1, %k0, %k0
; AVX512DQ-NEXT: kmovw %k1, %k2
; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $10, %k1, %k1
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT: kandw %k1, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1
; AVX512DQ-NEXT: kandw %k1, %k0, %k0
; AVX512DQ-NEXT: kmovw %k1, %k3
; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $8, %k1, %k1
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT: kandw %k1, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $7, %k1, %k1
; AVX512DQ-NEXT: kandw %k1, %k0, %k0
; AVX512DQ-NEXT: kmovw %k1, %k4
; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $6, %k1, %k1
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT: kandw %k1, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $5, %k1, %k1
; AVX512DQ-NEXT: movw $-2049, %ax ## imm = 0xF7FF
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kandw %k5, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $4, %k1, %k1
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT: kandw %k1, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $3, %k1, %k1
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT: kandw %k1, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $2, %k1, %k1
; AVX512DQ-NEXT: kmovw %eax, %k1
; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT: kandw %k1, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $14, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: andl $1, %eax
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; AVX512DQ-NEXT: kmovw %ecx, %k0
; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: korw %k0, %k6, %k0
; AVX512DQ-NEXT: kandw %k7, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $13, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k7, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $12, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k1, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $11, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kandw %k2, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $10, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k2, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $9, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kandw %k3, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $8, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k3, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $7, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kandw %k4, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $6, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k4, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $5, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT: kandw %k5, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $4, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k6, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $3, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k6, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $2, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k6, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $14, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: andl $1, %eax
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; AVX512DQ-NEXT: kmovw %ecx, %k0
; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0
; AVX512DQ-NEXT: korw %k0, %k6, %k0
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k6, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $13, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kandw %k7, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $12, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kandw %k1, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $11, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k1, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $10, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kandw %k2, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $9, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k1, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $8, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kandw %k3, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $7, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k3, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $6, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kandw %k4, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $5, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kandw %k5, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $4, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k1, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $3, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k2, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $2, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k5, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $14, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k0, %k0
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: andl $1, %eax
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; AVX512DQ-NEXT: kmovw %ecx, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $14, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k7, %k6
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k5, %k6, %k6
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k7
; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT: kshiftrw $13, %k7, %k7
; AVX512DQ-NEXT: korw %k7, %k6, %k6
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k5, %k6, %k6
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k7
; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT: kshiftrw $12, %k7, %k7
; AVX512DQ-NEXT: korw %k7, %k6, %k6
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k5, %k6, %k6
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k7
; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT: kshiftrw $11, %k7, %k7
; AVX512DQ-NEXT: korw %k7, %k6, %k6
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k5, %k6, %k6
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k7
; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT: kshiftrw $10, %k7, %k7
; AVX512DQ-NEXT: korw %k7, %k6, %k6
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k5, %k6, %k6
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k7
; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT: kshiftrw $9, %k7, %k7
; AVX512DQ-NEXT: korw %k7, %k6, %k6
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k5, %k6, %k6
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k7
; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT: kshiftrw $8, %k7, %k7
; AVX512DQ-NEXT: korw %k7, %k6, %k6
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k5, %k6, %k6
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k7
; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT: kshiftrw $7, %k7, %k7
; AVX512DQ-NEXT: korw %k7, %k6, %k6
; AVX512DQ-NEXT: kandw %k3, %k6, %k6
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k7
; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT: kshiftrw $6, %k7, %k7
; AVX512DQ-NEXT: korw %k7, %k6, %k6
; AVX512DQ-NEXT: kandw %k4, %k6, %k5
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k6
; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT: kshiftrw $5, %k6, %k6
; AVX512DQ-NEXT: korw %k6, %k5, %k5
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k3, %k5, %k4
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k5
; AVX512DQ-NEXT: kshiftlw $15, %k5, %k5
; AVX512DQ-NEXT: kshiftrw $4, %k5, %k5
; AVX512DQ-NEXT: korw %k5, %k4, %k4
; AVX512DQ-NEXT: kandw %k1, %k4, %k3
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k4
; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT: kshiftrw $3, %k4, %k4
; AVX512DQ-NEXT: korw %k4, %k3, %k3
; AVX512DQ-NEXT: kandw %k2, %k3, %k2
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k3
; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3
; AVX512DQ-NEXT: kshiftrw $2, %k3, %k3
; AVX512DQ-NEXT: korw %k3, %k2, %k2
; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; AVX512DQ-NEXT: kandw %k1, %k2, %k1
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k2
; AVX512DQ-NEXT: kshiftlw $14, %k2, %k2
; AVX512DQ-NEXT: korw %k2, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1
; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1
-; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al
+; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; AVX512DQ-NEXT: kmovw %eax, %k2
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT: korw %k2, %k1, %k1
;
; X86-LABEL: test_v8i1_add:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
%m0 = bitcast i8 %x to <8 x i1>
;
; X86-LABEL: test_v8i1_sub:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
%m0 = bitcast i8 %x to <8 x i1>
;
; X86-LABEL: test_v8i1_mul:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
%m0 = bitcast i8 %x to <8 x i1>
;
; X86-LABEL: test_v1i1_add:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
%m0 = bitcast i1 %x to <1 x i1>
;
; X86-LABEL: test_v1i1_sub:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
%m0 = bitcast i1 %x to <1 x i1>
;
; X86-LABEL: test_v1i1_mul:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
%m0 = bitcast i1 %x to <1 x i1>
define i8 @select05(i8 %a.0, i8 %m) {
; X86-LABEL: select05:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
;
define i8 @select06(i8 %a.0, i8 %m) {
; X86-LABEL: select06:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
;
define <4 x float> @test_mm128_maskz_dpbf16ps_128(<4 x float> %E, <4 x i32> %A, <4 x i32> %B, i4 zeroext %U) local_unnamed_addr #2 {
; X86-LABEL: test_mm128_maskz_dpbf16ps_128:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
; X86-NEXT: vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0x89,0x52,0xc2]
; X86-NEXT: retl # encoding: [0xc3]
define <4 x float> @test_mm128_mask_dpbf16ps_128(i4 zeroext %U, <4 x float> %E, <4 x i32> %A, <4 x i32> %B) local_unnamed_addr #2 {
; X86-LABEL: test_mm128_mask_dpbf16ps_128:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
; X86-NEXT: vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x09,0x52,0xc2]
; X86-NEXT: retl # encoding: [0xc3]
; X86: # %bb.0: # %entry
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kunpckdq %k1, %k0, %k1
; X86-NEXT: vpbroadcastb %eax, %zmm0 {%k1}
; X86-NEXT: retl
; X86: # %bb.0: # %entry
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kunpckdq %k1, %k0, %k1
; X86-NEXT: vpbroadcastb %eax, %zmm0 {%k1} {z}
; X86-NEXT: retl
define zeroext i8 @test_mm_mask_test_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_test_epi16_mask:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vptestmw %xmm0, %xmm1, %k0 {%k1}
; X86-NEXT: kmovd %k0, %eax
define zeroext i8 @test_mm_mask_testn_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_testn_epi16_mask:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vptestnmw %xmm0, %xmm1, %k0 {%k1}
; X86-NEXT: kmovd %k0, %eax
define <2 x i64> @test_mm_mask_set1_epi8(<2 x i64> %__O, i16 zeroext %__M, i8 signext %__A) local_unnamed_addr #0 {
; X86-LABEL: test_mm_mask_set1_epi8:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpbroadcastb %eax, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_set1_epi8(i16 zeroext %__M, i8 signext %__A) {
; X86-LABEL: test_mm_maskz_set1_epi8:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpbroadcastb %eax, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_set1_epi8(<4 x i64> %__O, i32 %__M, i8 signext %__A){
; X86-LABEL: test_mm256_mask_set1_epi8:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpbroadcastb %eax, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_set1_epi8(i32 %__M, i8 signext %__A) {
; X86-LABEL: test_mm256_maskz_set1_epi8:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpbroadcastb %eax, %ymm0 {%k1} {z}
; X86-NEXT: retl
; X86-LABEL: test_mm_mask_set1_epi16:
; X86: # %bb.0: # %entry
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovd %ecx, %k1
; X86-NEXT: vpbroadcastw %eax, %xmm0 {%k1}
; X86-NEXT: retl
; X86-LABEL: test_mm_maskz_set1_epi16:
; X86: # %bb.0: # %entry
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovd %ecx, %k1
; X86-NEXT: vpbroadcastw %eax, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_broadcastw_epi16(<2 x i64> %a0, i8 %a1, <2 x i64> %a2) {
; X86-LABEL: test_mm_mask_broadcastw_epi16:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpbroadcastw %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_broadcastw_epi16(i8 %a0, <2 x i64> %a1) {
; X86-LABEL: test_mm_maskz_broadcastw_epi16:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask2_permutex2var_epi16(<2 x i64> %__A, <2 x i64> %__I, i8 zeroext %__U, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask2_permutex2var_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1}
; X86-NEXT: vmovdqa %xmm1, %xmm0
define <2 x i64> @test_mm_mask_permutex2var_epi16(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__I, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_permutex2var_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_permutex2var_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_permutex2var_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
; X86: # %bb.0:
; X86-NEXT: vptestmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8]
; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
; X86-NEXT: retl # encoding: [0xc3]
; X86: # %bb.0:
; X86-NEXT: vptestnmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8]
; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
; X86-NEXT: retl # encoding: [0xc3]
define <8 x i64> @test_mm512_mask_madd52hi_epu64(<8 x i64> %__W, i8 zeroext %__M, <8 x i64> %__X, <8 x i64> %__Y) {
; X86-LABEL: test_mm512_mask_madd52hi_epu64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_madd52hi_epu64(i8 zeroext %__M, <8 x i64> %__X, <8 x i64> %__Y, <8 x i64> %__Z) {
; X86-LABEL: test_mm512_maskz_madd52hi_epu64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x i64> @test_mm512_mask_madd52lo_epu64(<8 x i64> %__W, i8 zeroext %__M, <8 x i64> %__X, <8 x i64> %__Y) {
; X86-LABEL: test_mm512_mask_madd52lo_epu64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_madd52lo_epu64(i8 zeroext %__M, <8 x i64> %__X, <8 x i64> %__Y, <8 x i64> %__Z) {
; X86-LABEL: test_mm512_maskz_madd52lo_epu64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_madd52hi_epu64(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) {
; X86-LABEL: test_mm_mask_madd52hi_epu64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_madd52hi_epu64(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) {
; X86-LABEL: test_mm_maskz_madd52hi_epu64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_madd52hi_epu64(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) {
; X86-LABEL: test_mm256_mask_madd52hi_epu64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_madd52hi_epu64(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) {
; X86-LABEL: test_mm256_maskz_madd52hi_epu64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_madd52lo_epu64(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) {
; X86-LABEL: test_mm_mask_madd52lo_epu64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_madd52lo_epu64(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) {
; X86-LABEL: test_mm_maskz_madd52lo_epu64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_madd52lo_epu64(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) {
; X86-LABEL: test_mm256_mask_madd52lo_epu64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_madd52lo_epu64(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) {
; X86-LABEL: test_mm256_maskz_madd52lo_epu64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <8 x i64> @test_mm512_mask_shldi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_shldi_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_shldi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_shldi_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x i64> @test_mm512_mask_shrdi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_shrdi_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_shrdi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_shrdi_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x i64> @test_mm512_mask_shldv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_shldv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_shldv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_shldv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x i64> @test_mm512_mask_shrdv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_shrdv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
define <8 x i64> @test_mm512_maskz_shrdv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_shrdv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_compress_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) {
; X86-LABEL: test_mm_mask_compress_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpcompressw %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_compress_epi16(i8 zeroext %__U, <2 x i64> %__D) {
; X86-LABEL: test_mm_maskz_compress_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define void @test_mm_mask_compressstoreu_epi16(ptr %__P, i8 zeroext %__U, <2 x i64> %__D) {
; X86-LABEL: test_mm_mask_compressstoreu_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpcompressw %xmm0, (%ecx) {%k1}
define <2 x i64> @test_mm_mask_expand_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) {
; X86-LABEL: test_mm_mask_expand_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpexpandw %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_expand_epi16(i8 zeroext %__U, <2 x i64> %__D) {
; X86-LABEL: test_mm_maskz_expand_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
; X86-LABEL: test_mm_mask_expandloadu_epi16:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovd %ecx, %k1
; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1}
; X86-NEXT: retl
; X86-LABEL: test_mm_maskz_expandloadu_epi16:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovd %ecx, %k1
; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_shldi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_shldi_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldq $47, %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_shldi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_maskz_shldi_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldq $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_shldi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_shldi_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldq $47, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_shldi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_shldi_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldq $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_shldi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_shldi_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldd $7, %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_shldi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_maskz_shldi_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldd $15, %ymm1, %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_shldi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_shldi_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldd $7, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_shldi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_shldi_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldd $15, %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_shldi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_shldi_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldw $3, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_shldi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_shldi_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_shrdi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_shrdi_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdq $47, %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_shrdi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_maskz_shrdi_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdq $63, %ymm1, %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_shrdi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_shrdi_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdq $47, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_shrdi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_shrdi_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdq $63, %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_shrdi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_shrdi_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdd $7, %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_shrdi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_maskz_shrdi_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdd $15, %ymm1, %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_shrdi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_shrdi_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdd $7, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_shrdi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_shrdi_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdd $15, %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_shrdi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_shrdi_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdw $3, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_shrdi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_shrdi_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_shldv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_shldv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_shldv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_maskz_shldv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_shldv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_shldv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_shldv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_shldv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_shldv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_shldv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_shldv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_maskz_shldv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_shldv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_shldv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_shldv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_shldv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_shldv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_shldv_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_shldv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_shldv_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_shrdv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_shrdv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_shrdv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_maskz_shrdv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_shrdv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_shrdv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_shrdv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_shrdv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_shrdv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_shrdv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_shrdv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_maskz_shrdv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_shrdv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_shrdv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_shrdv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_shrdv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_shrdv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_shrdv_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_shrdv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_shrdv_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovd %eax, %k1
; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x float> @test_mm_mask_cvtepi32_ps(<4 x float> %__W, i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_mask_cvtepi32_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtdq2ps %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_cvtepi32_ps(i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_maskz_cvtepi32_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x float> @test_mm256_mask_cvtepi32_ps(<8 x float> %__W, i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_mask_cvtepi32_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtdq2ps %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <8 x float> @test_mm256_maskz_cvtepi32_ps(i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_maskz_cvtepi32_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_cvtpd_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x double> %__A) {
; X86-LABEL: test_mm_mask_cvtpd_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtpd2dq %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_cvtpd_epi32(i8 zeroext %__U, <2 x double> %__A) {
; X86-LABEL: test_mm_maskz_cvtpd_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtpd2dq %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm256_mask_cvtpd_epi32(<2 x i64> %__W, i8 zeroext %__U, <4 x double> %__A) {
; X86-LABEL: test_mm256_mask_cvtpd_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtpd2dq %ymm1, %xmm0 {%k1}
; X86-NEXT: vzeroupper
define <2 x i64> @test_mm256_maskz_cvtpd_epi32(i8 zeroext %__U, <4 x double> %__A) {
; X86-LABEL: test_mm256_maskz_cvtpd_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtpd2dq %ymm0, %xmm0 {%k1} {z}
; X86-NEXT: vzeroupper
define <4 x float> @test_mm_mask_cvtpd_ps(<4 x float> %__W, i8 zeroext %__U, <2 x double> %__A) {
; X86-LABEL: test_mm_mask_cvtpd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtpd2ps %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_cvtpd_ps(i8 zeroext %__U, <2 x double> %__A) {
; X86-LABEL: test_mm_maskz_cvtpd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtpd2ps %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x float> @test_mm256_mask_cvtpd_ps(<4 x float> %__W, i8 zeroext %__U, <4 x double> %__A) {
; X86-LABEL: test_mm256_mask_cvtpd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtpd2ps %ymm1, %xmm0 {%k1}
; X86-NEXT: vzeroupper
define <4 x float> @test_mm256_maskz_cvtpd_ps(i8 zeroext %__U, <4 x double> %__A) {
; X86-LABEL: test_mm256_maskz_cvtpd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z}
; X86-NEXT: vzeroupper
define <2 x i64> @test_mm_mask_cvtpd_epu32(<2 x i64> %__W, i8 zeroext %__U, <2 x double> %__A) {
; X86-LABEL: test_mm_mask_cvtpd_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtpd2udq %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_cvtpd_epu32(i8 zeroext %__U, <2 x double> %__A) {
; X86-LABEL: test_mm_maskz_cvtpd_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtpd2udq %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm256_mask_cvtpd_epu32(<2 x i64> %__W, i8 zeroext %__U, <4 x double> %__A) {
; X86-LABEL: test_mm256_mask_cvtpd_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtpd2udq %ymm1, %xmm0 {%k1}
; X86-NEXT: vzeroupper
define <2 x i64> @test_mm256_maskz_cvtpd_epu32(i8 zeroext %__U, <4 x double> %__A) {
; X86-LABEL: test_mm256_maskz_cvtpd_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtpd2udq %ymm0, %xmm0 {%k1} {z}
; X86-NEXT: vzeroupper
define <4 x float> @test_mm_mask_cvtph_ps(<4 x float> %__W, i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_mask_cvtph_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtph2ps %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_cvtph_ps(i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_maskz_cvtph_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtph2ps %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x float> @test_mm256_mask_cvtph_ps(<8 x float> %__W, i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm256_mask_cvtph_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtph2ps %xmm1, %ymm0 {%k1}
; X86-NEXT: retl
define <8 x float> @test_mm256_maskz_cvtph_ps(i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm256_maskz_cvtph_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtph2ps %xmm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_cvtps_epi32(<2 x i64> %__W, i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_mask_cvtps_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtps2dq %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_cvtps_epi32(i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_maskz_cvtps_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtps2dq %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_cvtps_epi32(<4 x i64> %__W, i8 zeroext %__U, <8 x float> %__A) {
; X86-LABEL: test_mm256_mask_cvtps_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtps2dq %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_cvtps_epi32(i8 zeroext %__U, <8 x float> %__A) {
; X86-LABEL: test_mm256_maskz_cvtps_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtps2dq %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x double> @test_mm_mask_cvtps_pd(<2 x double> %__W, i8 zeroext %__U, <4 x float> %__A) local_unnamed_addr #0 {
; X86-LABEL: test_mm_mask_cvtps_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtps2pd %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_cvtps_pd(i8 zeroext %__U, <4 x float> %__A) local_unnamed_addr #0 {
; X86-LABEL: test_mm_maskz_cvtps_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtps2pd %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x double> @test_mm256_mask_cvtps_pd(<4 x double> %__W, i8 zeroext %__U, <4 x float> %__A) local_unnamed_addr #0 {
; X86-LABEL: test_mm256_mask_cvtps_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtps2pd %xmm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x double> @test_mm256_maskz_cvtps_pd(i8 zeroext %__U, <4 x float> %__A) local_unnamed_addr #0 {
; X86-LABEL: test_mm256_maskz_cvtps_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_cvtps_epu32(<2 x i64> %__W, i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_mask_cvtps_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtps2udq %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_cvtps_epu32(i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_maskz_cvtps_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtps2udq %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_cvtps_epu32(<4 x i64> %__W, i8 zeroext %__U, <8 x float> %__A) {
; X86-LABEL: test_mm256_mask_cvtps_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtps2udq %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_cvtps_epu32(i8 zeroext %__U, <8 x float> %__A) {
; X86-LABEL: test_mm256_maskz_cvtps_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtps2udq %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_cvttpd_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x double> %__A) {
; X86-LABEL: test_mm_mask_cvttpd_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvttpd2dq %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_cvttpd_epi32(i8 zeroext %__U, <2 x double> %__A) {
; X86-LABEL: test_mm_maskz_cvttpd_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvttpd2dq %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm256_mask_cvttpd_epi32(<2 x i64> %__W, i8 zeroext %__U, <4 x double> %__A) {
; X86-LABEL: test_mm256_mask_cvttpd_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvttpd2dq %ymm1, %xmm0 {%k1}
; X86-NEXT: vzeroupper
define <2 x i64> @test_mm256_maskz_cvttpd_epi32(i8 zeroext %__U, <4 x double> %__A) {
; X86-LABEL: test_mm256_maskz_cvttpd_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvttpd2dq %ymm0, %xmm0 {%k1} {z}
; X86-NEXT: vzeroupper
define <2 x i64> @test_mm_mask_cvttpd_epu32(<2 x i64> %__W, i8 zeroext %__U, <2 x double> %__A) {
; X86-LABEL: test_mm_mask_cvttpd_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvttpd2udq %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_cvttpd_epu32(i8 zeroext %__U, <2 x double> %__A) {
; X86-LABEL: test_mm_maskz_cvttpd_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvttpd2udq %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm256_mask_cvttpd_epu32(<2 x i64> %__W, i8 zeroext %__U, <4 x double> %__A) {
; X86-LABEL: test_mm256_mask_cvttpd_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvttpd2udq %ymm1, %xmm0 {%k1}
; X86-NEXT: vzeroupper
define <2 x i64> @test_mm256_maskz_cvttpd_epu32(i8 zeroext %__U, <4 x double> %__A) {
; X86-LABEL: test_mm256_maskz_cvttpd_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvttpd2udq %ymm0, %xmm0 {%k1} {z}
; X86-NEXT: vzeroupper
define <2 x i64> @test_mm_mask_cvttps_epi32(<2 x i64> %__W, i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_mask_cvttps_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvttps2dq %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_cvttps_epi32(i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_maskz_cvttps_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvttps2dq %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_cvttps_epi32(<4 x i64> %__W, i8 zeroext %__U, <8 x float> %__A) {
; X86-LABEL: test_mm256_mask_cvttps_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvttps2dq %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_cvttps_epi32(i8 zeroext %__U, <8 x float> %__A) {
; X86-LABEL: test_mm256_maskz_cvttps_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvttps2dq %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_cvttps_epu32(<2 x i64> %__W, i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_mask_cvttps_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvttps2udq %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_cvttps_epu32(i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_maskz_cvttps_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvttps2udq %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_cvttps_epu32(<4 x i64> %__W, i8 zeroext %__U, <8 x float> %__A) {
; X86-LABEL: test_mm256_mask_cvttps_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvttps2udq %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_cvttps_epu32(i8 zeroext %__U, <8 x float> %__A) {
; X86-LABEL: test_mm256_maskz_cvttps_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvttps2udq %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x double> @test_mm_mask_cvtepu32_pd(<2 x double> %__W, i8 zeroext %__U, <2 x i64> %__A) local_unnamed_addr #0 {
; X86-LABEL: test_mm_mask_cvtepu32_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtudq2pd %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_cvtepu32_pd(i8 zeroext %__U, <2 x i64> %__A) local_unnamed_addr #0 {
; X86-LABEL: test_mm_maskz_cvtepu32_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x double> @test_mm256_mask_cvtepu32_pd(<4 x double> %__W, i8 zeroext %__U, <2 x i64> %__A) local_unnamed_addr #0 {
; X86-LABEL: test_mm256_mask_cvtepu32_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtudq2pd %xmm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x double> @test_mm256_maskz_cvtepu32_pd(i8 zeroext %__U, <2 x i64> %__A) local_unnamed_addr #0 {
; X86-LABEL: test_mm256_maskz_cvtepu32_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtudq2pd %xmm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <4 x float> @test_mm_mask_cvtepu32_ps(<4 x float> %__W, i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_mask_cvtepu32_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtudq2ps %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_cvtepu32_ps(i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_maskz_cvtepu32_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtudq2ps %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x float> @test_mm256_mask_cvtepu32_ps(<8 x float> %__W, i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_mask_cvtepu32_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtudq2ps %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <8 x float> @test_mm256_maskz_cvtepu32_ps(i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_maskz_cvtepu32_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcvtudq2ps %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <8 x float> @test_mm256_mask_shuffle_f32x4(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
; X86-LABEL: test_mm256_mask_shuffle_f32x4:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} = ymm1[4,5,6,7],ymm2[4,5,6,7]
; X86-NEXT: retl
define <8 x float> @test_mm256_maskz_shuffle_f32x4(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) {
; X86-LABEL: test_mm256_maskz_shuffle_f32x4:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7]
; X86-NEXT: retl
define <4 x double> @test_mm256_mask_shuffle_f64x2(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
; X86-LABEL: test_mm256_mask_shuffle_f64x2:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} = ymm1[2,3],ymm2[2,3]
; X86-NEXT: retl
define <4 x double> @test_mm256_maskz_shuffle_f64x2(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
; X86-LABEL: test_mm256_maskz_shuffle_f64x2:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3]
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_shuffle_i32x4(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_shuffle_i32x4:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} = ymm1[4,5,6,7],ymm2[4,5,6,7]
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_shuffle_i32x4(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_maskz_shuffle_i32x4:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7]
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_shuffle_i64x2(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_shuffle_i64x2:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} = ymm1[2,3],ymm2[2,3]
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_shuffle_i64x2(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_maskz_shuffle_i64x2:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3]
; X86-NEXT: retl
define zeroext i8 @test_mm_mask_test_epi32_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_test_epi32_mask:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vptestmd %xmm0, %xmm1, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
define zeroext i8 @test_mm256_mask_test_epi32_mask(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_test_epi32_mask:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vptestmd %ymm0, %ymm1, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
define zeroext i8 @test_mm_mask_test_epi64_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_test_epi64_mask:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vptestmq %xmm0, %xmm1, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
define zeroext i8 @test_mm256_mask_test_epi64_mask(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_test_epi64_mask:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vptestmq %ymm0, %ymm1, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
define zeroext i8 @test_mm_mask_testn_epi32_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_testn_epi32_mask:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vptestnmd %xmm0, %xmm1, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
define zeroext i8 @test_mm256_mask_testn_epi32_mask(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_testn_epi32_mask:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vptestnmd %ymm0, %ymm1, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
define zeroext i8 @test_mm_mask_testn_epi64_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_testn_epi64_mask:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vptestnmq %xmm0, %xmm1, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
define zeroext i8 @test_mm256_mask_testn_epi64_mask(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_testn_epi64_mask:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vptestnmq %ymm0, %ymm1, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
define <2 x i64> @test_mm_mask_set1_epi32(<2 x i64> %__O, i8 zeroext %__M) {
; X86-LABEL: test_mm_mask_set1_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_set1_epi32(i8 zeroext %__M) {
; X86-LABEL: test_mm_maskz_set1_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_set1_epi32(<4 x i64> %__O, i8 zeroext %__M) {
; X86-LABEL: test_mm256_mask_set1_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_set1_epi32(i8 zeroext %__M) {
; X86-LABEL: test_mm256_maskz_set1_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_set1_epi64(<2 x i64> %__O, i8 zeroext %__M, i64 %__A) {
; X86-LABEL: test_mm_mask_set1_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
; X86-NEXT: kmovw %eax, %k1
define <2 x i64> @test_mm_maskz_set1_epi64(i8 zeroext %__M, i64 %__A) {
; X86-LABEL: test_mm_maskz_set1_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT: kmovw %eax, %k1
define <4 x i64> @test_mm256_mask_set1_epi64(<4 x i64> %__O, i8 zeroext %__M, i64 %__A) {
; X86-LABEL: test_mm256_mask_set1_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
; X86-NEXT: kmovw %eax, %k1
define <4 x i64> @test_mm256_maskz_set1_epi64(i8 zeroext %__M, i64 %__A) {
; X86-LABEL: test_mm256_maskz_set1_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT: kmovw %eax, %k1
define <2 x i64> @test_mm_mask_broadcastd_epi32(<2 x i64> %__O, i8 zeroext %__M, <2 x i64> %__A) {
; X86-LABEL: test_mm_mask_broadcastd_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_broadcastd_epi32(i8 zeroext %__M, <2 x i64> %__A) {
; X86-LABEL: test_mm_maskz_broadcastd_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_broadcastd_epi32(<4 x i64> %a0, i8 %a1, <2 x i64> %a2) {
; X86-LABEL: test_mm256_mask_broadcastd_epi32:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd %xmm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_broadcastd_epi32(i8 %a0, <2 x i64> %a1) {
; X86-LABEL: test_mm256_maskz_broadcastd_epi32:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd %xmm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_broadcastq_epi64(<2 x i64> %__O, i8 zeroext %__M, <2 x i64> %__A) {
; X86-LABEL: test_mm_mask_broadcastq_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastq %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_broadcastq_epi64(i8 zeroext %__M, <2 x i64> %__A) {
; X86-LABEL: test_mm_maskz_broadcastq_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_broadcastq_epi64(<4 x i64> %__O, i8 zeroext %__M, <2 x i64> %__A) {
; X86-LABEL: test_mm256_mask_broadcastq_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastq %xmm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_broadcastq_epi64(i8 zeroext %__M, <2 x i64> %__A) {
; X86-LABEL: test_mm256_maskz_broadcastq_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <4 x double> @test_mm256_mask_broadcastsd_pd(<4 x double> %__O, i8 zeroext %__M, <2 x double> %__A) {
; X86-LABEL: test_mm256_mask_broadcastsd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x double> @test_mm256_maskz_broadcastsd_pd(i8 zeroext %__M, <2 x double> %__A) {
; X86-LABEL: test_mm256_maskz_broadcastsd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <4 x float> @test_mm_mask_broadcastss_ps(<4 x float> %__O, i8 zeroext %__M, <4 x float> %__A) {
; X86-LABEL: test_mm_mask_broadcastss_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastss %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_broadcastss_ps(i8 zeroext %__M, <4 x float> %__A) {
; X86-LABEL: test_mm_maskz_broadcastss_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x float> @test_mm256_mask_broadcastss_ps(<8 x float> %a0, i8 %a1, <4 x float> %a2) {
; X86-LABEL: test_mm256_mask_broadcastss_ps:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastss %xmm1, %ymm0 {%k1}
; X86-NEXT: retl
define <8 x float> @test_mm256_maskz_broadcastss_ps(i8 %a0, <4 x float> %a1) {
; X86-LABEL: test_mm256_maskz_broadcastss_ps:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x double> @test_mm_mask_movedup_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A) {
; X86-LABEL: test_mm_mask_movedup_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_movedup_pd(i8 zeroext %__U, <2 x double> %__A) {
; X86-LABEL: test_mm_maskz_movedup_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
; X86-NEXT: retl
define <4 x double> @test_mm256_mask_movedup_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A) {
; X86-LABEL: test_mm256_mask_movedup_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2]
; X86-NEXT: retl
define <4 x double> @test_mm256_maskz_movedup_pd(i8 zeroext %__U, <4 x double> %__A) {
; X86-LABEL: test_mm256_maskz_movedup_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
; X86-NEXT: retl
define <4 x float> @test_mm_mask_movehdup_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_mask_movehdup_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3]
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_movehdup_ps(i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_maskz_movehdup_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
; X86-NEXT: retl
define <8 x float> @test_mm256_mask_movehdup_ps(<8 x float> %a0, i8 %a1, <8 x float> %a2) {
; X86-LABEL: test_mm256_mask_movehdup_ps:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = ymm1[1,1,3,3,5,5,7,7]
; X86-NEXT: retl
define <8 x float> @test_mm256_maskz_movehdup_ps(i8 %a0, <8 x float> %a1) {
; X86-LABEL: test_mm256_maskz_movehdup_ps:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
; X86-NEXT: retl
define <4 x float> @test_mm_mask_moveldup_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_mask_moveldup_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2]
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_moveldup_ps(i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_maskz_moveldup_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
; X86-NEXT: retl
define <8 x float> @test_mm256_mask_moveldup_ps(<8 x float> %a0, i8 %a1, <8 x float> %a2) {
; X86-LABEL: test_mm256_mask_moveldup_ps:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2,4,4,6,6]
; X86-NEXT: retl
define <8 x float> @test_mm256_maskz_moveldup_ps(i8 %a0, <8 x float> %a1) {
; X86-LABEL: test_mm256_maskz_moveldup_ps:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_permutex_epi64(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X) {
; X86-LABEL: test_mm256_mask_permutex_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermq {{.*#+}} ymm0 {%k1} = ymm1[3,0,0,0]
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_permutex_epi64(i8 zeroext %__M, <4 x i64> %__X) {
; X86-LABEL: test_mm256_maskz_permutex_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0]
; X86-NEXT: retl
define <4 x double> @test_mm256_mask_permutex_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__X) {
; X86-LABEL: test_mm256_mask_permutex_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
; X86-NEXT: retl
define <4 x double> @test_mm256_maskz_permutex_pd(i8 zeroext %__U, <4 x double> %__X) {
; X86-LABEL: test_mm256_maskz_permutex_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
; X86-NEXT: retl
define <2 x double> @test_mm_mask_shuffle_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_shuffle_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1]
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_shuffle_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
; X86-LABEL: test_mm_maskz_shuffle_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
; X86-NEXT: retl
define <4 x double> @test_mm256_mask_shuffle_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
; X86-LABEL: test_mm256_mask_shuffle_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2]
; X86-NEXT: retl
define <4 x double> @test_mm256_maskz_shuffle_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) {
; X86-LABEL: test_mm256_maskz_shuffle_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[2],ymm1[2]
; X86-NEXT: retl
define <4 x float> @test_mm_mask_shuffle_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_shuffle_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0]
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_shuffle_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
; X86-LABEL: test_mm_maskz_shuffle_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1],xmm1[0,0]
; X86-NEXT: retl
define <8 x float> @test_mm256_mask_shuffle_ps(<8 x float> %a0, i8 %a1, <8 x float> %a2, <8 x float> %a3) {
; X86-LABEL: test_mm256_mask_shuffle_ps:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufps {{.*#+}} ymm0 {%k1} = ymm1[0,1],ymm2[0,0],ymm1[4,5],ymm2[4,4]
; X86-NEXT: retl
define <8 x float> @test_mm256_maskz_shuffle_ps(i8 %a0, <8 x float> %a1, <8 x float> %a2) {
; X86-LABEL: test_mm256_maskz_shuffle_ps:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1],ymm1[0,0],ymm0[4,5],ymm1[4,4]
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_mul_epi32(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) nounwind {
; X86-LABEL: test_mm256_mask_mul_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmuldq %ymm1, %ymm2, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_mul_epi32(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) nounwind {
; X86-LABEL: test_mm256_maskz_mul_epi32:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmuldq %ymm0, %ymm1, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_mul_epi32(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) nounwind {
; X86-LABEL: test_mm_mask_mul_epi32:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmuldq %xmm1, %xmm2, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_mul_epi32(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) nounwind {
; X86-LABEL: test_mm_maskz_mul_epi32:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmuldq %xmm0, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_mul_epu32(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) nounwind {
; X86-LABEL: test_mm256_mask_mul_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmuludq %ymm1, %ymm2, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_mul_epu32(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) nounwind {
; X86-LABEL: test_mm256_maskz_mul_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_mul_epu32(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) nounwind {
; X86-LABEL: test_mm_mask_mul_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmuludq %xmm1, %xmm2, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_mul_epu32(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) nounwind {
; X86-LABEL: test_mm_maskz_mul_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm256_mask_cvtepi32_epi16(<2 x i64> %__O, i8 zeroext %__M, <4 x i64> %__A) {
; X86-LABEL: test_mm256_mask_cvtepi32_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmovdw %ymm1, %xmm0 {%k1}
; X86-NEXT: vzeroupper
define <2 x i64> @test_mm256_maskz_cvtepi32_epi16(i8 zeroext %__M, <4 x i64> %__A) {
; X86-LABEL: test_mm256_maskz_cvtepi32_epi16:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z}
; X86-NEXT: vzeroupper
define <2 x i64> @test_mm256_mask_cvtepi64_epi32(<2 x i64> %__O, i8 zeroext %__M, <4 x i64> %__A) {
; X86-LABEL: test_mm256_mask_cvtepi64_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmovqd %ymm1, %xmm0 {%k1}
; X86-NEXT: vzeroupper
define <2 x i64> @test_mm256_maskz_cvtepi64_epi32(i8 zeroext %__M, <4 x i64> %__A) {
; X86-LABEL: test_mm256_maskz_cvtepi64_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmovqd %ymm0, %xmm0 {%k1} {z}
; X86-NEXT: vzeroupper
define <2 x i64> @test_mm_mask_ternarylogic_epi32(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__B, <2 x i64> %__C) {
; X86-LABEL: test_mm_mask_ternarylogic_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_ternarylogic_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) {
; X86-LABEL: test_mm_maskz_ternarylogic_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_ternarylogic_epi32(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__B, <4 x i64> %__C) {
; X86-LABEL: test_mm256_mask_ternarylogic_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_ternarylogic_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) {
; X86-LABEL: test_mm256_maskz_ternarylogic_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_ternarylogic_epi64(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__B, <2 x i64> %__C) {
; X86-LABEL: test_mm_mask_ternarylogic_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_ternarylogic_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) {
; X86-LABEL: test_mm_maskz_ternarylogic_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_ternarylogic_epi64(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__B, <4 x i64> %__C) {
; X86-LABEL: test_mm256_mask_ternarylogic_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_ternarylogic_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) {
; X86-LABEL: test_mm256_maskz_ternarylogic_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask2_permutex2var_epi32(<2 x i64> %__A, <2 x i64> %__I, i8 zeroext %__U, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask2_permutex2var_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermi2d %xmm2, %xmm0, %xmm1 {%k1}
; X86-NEXT: vmovdqa %xmm1, %xmm0
define <4 x i64> @test_mm256_mask2_permutex2var_epi32(<4 x i64> %__A, <4 x i64> %__I, i8 zeroext %__U, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask2_permutex2var_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermi2d %ymm2, %ymm0, %ymm1 {%k1}
; X86-NEXT: vmovdqa %ymm1, %ymm0
define <2 x double> @test_mm_mask2_permutex2var_pd(<2 x double> %__A, <2 x i64> %__I, i8 zeroext %__U, <2 x double> %__B) {
; X86-LABEL: test_mm_mask2_permutex2var_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 {%k1}
; X86-NEXT: vmovapd %xmm1, %xmm0
define <4 x double> @test_mm256_mask2_permutex2var_pd(<4 x double> %__A, <4 x i64> %__I, i8 zeroext %__U, <4 x double> %__B) {
; X86-LABEL: test_mm256_mask2_permutex2var_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 {%k1}
; X86-NEXT: vmovapd %ymm1, %ymm0
define <4 x float> @test_mm_mask2_permutex2var_ps(<4 x float> %__A, <2 x i64> %__I, i8 zeroext %__U, <4 x float> %__B) {
; X86-LABEL: test_mm_mask2_permutex2var_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1}
; X86-NEXT: vmovaps %xmm1, %xmm0
define <8 x float> @test_mm256_mask2_permutex2var_ps(<8 x float> %__A, <4 x i64> %__I, i8 zeroext %__U, <8 x float> %__B) {
; X86-LABEL: test_mm256_mask2_permutex2var_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 {%k1}
; X86-NEXT: vmovaps %ymm1, %ymm0
define <2 x i64> @test_mm_mask2_permutex2var_epi64(<2 x i64> %__A, <2 x i64> %__I, i8 zeroext %__U, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask2_permutex2var_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermi2q %xmm2, %xmm0, %xmm1 {%k1}
; X86-NEXT: vmovdqa %xmm1, %xmm0
define <4 x i64> @test_mm256_mask2_permutex2var_epi64(<4 x i64> %__A, <4 x i64> %__I, i8 zeroext %__U, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask2_permutex2var_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermi2q %ymm2, %ymm0, %ymm1 {%k1}
; X86-NEXT: vmovdqa %ymm1, %ymm0
define <2 x i64> @test_mm_mask_permutex2var_epi32(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__I, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_permutex2var_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2d %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_permutex2var_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_permutex2var_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2d %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_permutex2var_epi32(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__I, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_permutex2var_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2d %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_permutex2var_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) {
; X86-LABEL: test_mm256_maskz_permutex2var_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2d %ymm2, %ymm1, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x double> @test_mm_mask_permutex2var_pd(<2 x double> %__A, i8 zeroext %__U, <2 x i64> %__I, <2 x double> %__B) {
; X86-LABEL: test_mm_mask_permutex2var_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2pd %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_permutex2var_pd(i8 zeroext %__U, <2 x double> %__A, <2 x i64> %__I, <2 x double> %__B) {
; X86-LABEL: test_mm_maskz_permutex2var_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2pd %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x double> @test_mm256_mask_permutex2var_pd(<4 x double> %__A, i8 zeroext %__U, <4 x i64> %__I, <4 x double> %__B) {
; X86-LABEL: test_mm256_mask_permutex2var_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2pd %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x double> @test_mm256_maskz_permutex2var_pd(i8 zeroext %__U, <4 x double> %__A, <4 x i64> %__I, <4 x double> %__B) {
; X86-LABEL: test_mm256_maskz_permutex2var_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2pd %ymm2, %ymm1, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <4 x float> @test_mm_mask_permutex2var_ps(<4 x float> %__A, i8 zeroext %__U, <2 x i64> %__I, <4 x float> %__B) {
; X86-LABEL: test_mm_mask_permutex2var_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2ps %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_permutex2var_ps(i8 zeroext %__U, <4 x float> %__A, <2 x i64> %__I, <4 x float> %__B) {
; X86-LABEL: test_mm_maskz_permutex2var_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2ps %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x float> @test_mm256_mask_permutex2var_ps(<8 x float> %__A, i8 zeroext %__U, <4 x i64> %__I, <8 x float> %__B) {
; X86-LABEL: test_mm256_mask_permutex2var_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2ps %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <8 x float> @test_mm256_maskz_permutex2var_ps(i8 zeroext %__U, <8 x float> %__A, <4 x i64> %__I, <8 x float> %__B) {
; X86-LABEL: test_mm256_maskz_permutex2var_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2ps %ymm2, %ymm1, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_permutex2var_epi64(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__I, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_permutex2var_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2q %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_permutex2var_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_permutex2var_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2q %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_permutex2var_epi64(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__I, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_permutex2var_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2q %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_permutex2var_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) {
; X86-LABEL: test_mm256_maskz_permutex2var_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2q %ymm2, %ymm1, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x double> @test_mm_mask_fmadd_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_mask_fmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd132pd {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) + xmm2
; X86-NEXT: retl
define <2 x double> @test_mm_mask_fmsub_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_mask_fmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub132pd {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) - xmm2
; X86-NEXT: retl
define <2 x double> @test_mm_mask3_fmadd_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd231pd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) + xmm2
; X86-NEXT: vmovapd %xmm2, %xmm0
define <2 x double> @test_mm_mask3_fnmadd_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd231pd {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) + xmm2
; X86-NEXT: vmovapd %xmm2, %xmm0
define <2 x double> @test_mm_maskz_fmadd_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213pd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_fmsub_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213pd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_fnmadd_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fnmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213pd {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_fnmsub_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fnmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213pd {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
; X86-NEXT: retl
define <4 x double> @test_mm256_mask_fmadd_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) {
; X86-LABEL: test_mm256_mask_fmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd132pd {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) + ymm2
; X86-NEXT: retl
define <4 x double> @test_mm256_mask_fmsub_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) {
; X86-LABEL: test_mm256_mask_fmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub132pd {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) - ymm2
; X86-NEXT: retl
define <4 x double> @test_mm256_mask3_fmadd_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm256_mask3_fmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd231pd {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) + ymm2
; X86-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_mm256_mask3_fnmadd_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm256_mask3_fnmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd231pd {{.*#+}} ymm2 {%k1} = -(ymm0 * ymm1) + ymm2
; X86-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_mm256_maskz_fmadd_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) {
; X86-LABEL: test_mm256_maskz_fmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213pd {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2
; X86-NEXT: retl
define <4 x double> @test_mm256_maskz_fmsub_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) {
; X86-LABEL: test_mm256_maskz_fmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213pd {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) - ymm2
; X86-NEXT: retl
define <4 x double> @test_mm256_maskz_fnmadd_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) {
; X86-LABEL: test_mm256_maskz_fnmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213pd {{.*#+}} ymm0 {%k1} {z} = -(ymm1 * ymm0) + ymm2
; X86-NEXT: retl
define <4 x double> @test_mm256_maskz_fnmsub_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) {
; X86-LABEL: test_mm256_maskz_fnmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213pd {{.*#+}} ymm0 {%k1} {z} = -(ymm1 * ymm0) - ymm2
; X86-NEXT: retl
define <4 x float> @test_mm_mask_fmadd_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_mask_fmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd132ps {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) + xmm2
; X86-NEXT: retl
define <4 x float> @test_mm_mask_fmsub_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_mask_fmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub132ps {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) - xmm2
; X86-NEXT: retl
define <4 x float> @test_mm_mask3_fmadd_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd231ps {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) + xmm2
; X86-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_mm_mask3_fnmadd_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd231ps {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) + xmm2
; X86-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_mm_maskz_fmadd_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213ps {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_fmsub_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213ps {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_fnmadd_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fnmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213ps {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_fnmsub_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fnmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213ps {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2
; X86-NEXT: retl
define <8 x float> @test_mm256_mask_fmadd_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) {
; X86-LABEL: test_mm256_mask_fmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd132ps {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) + ymm2
; X86-NEXT: retl
define <8 x float> @test_mm256_mask_fmsub_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) {
; X86-LABEL: test_mm256_mask_fmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub132ps {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) - ymm2
; X86-NEXT: retl
define <8 x float> @test_mm256_mask3_fmadd_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm256_mask3_fmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd231ps {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) + ymm2
; X86-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_mm256_mask3_fnmadd_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm256_mask3_fnmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd231ps {{.*#+}} ymm2 {%k1} = -(ymm0 * ymm1) + ymm2
; X86-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_mm256_maskz_fmadd_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) {
; X86-LABEL: test_mm256_maskz_fmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213ps {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2
; X86-NEXT: retl
define <8 x float> @test_mm256_maskz_fmsub_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) {
; X86-LABEL: test_mm256_maskz_fmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213ps {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) - ymm2
; X86-NEXT: retl
define <8 x float> @test_mm256_maskz_fnmadd_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) {
; X86-LABEL: test_mm256_maskz_fnmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213ps {{.*#+}} ymm0 {%k1} {z} = -(ymm1 * ymm0) + ymm2
; X86-NEXT: retl
define <8 x float> @test_mm256_maskz_fnmsub_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) {
; X86-LABEL: test_mm256_maskz_fnmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213ps {{.*#+}} ymm0 {%k1} {z} = -(ymm1 * ymm0) - ymm2
; X86-NEXT: retl
define <2 x double> @test_mm_mask_fmaddsub_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_mask_fmaddsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub132pd {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2
; X86-NEXT: retl
define <2 x double> @test_mm_mask_fmsubadd_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_mask_fmsubadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd132pd {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) -/+ xmm2
; X86-NEXT: retl
define <2 x double> @test_mm_mask3_fmaddsub_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmaddsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub231pd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2
; X86-NEXT: vmovapd %xmm2, %xmm0
define <2 x double> @test_mm_maskz_fmaddsub_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fmaddsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub213pd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_fmsubadd_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_maskz_fmsubadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd213pd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) -/+ xmm2
; X86-NEXT: retl
define <4 x double> @test_mm256_mask_fmaddsub_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) {
; X86-LABEL: test_mm256_mask_fmaddsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub132pd {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2
; X86-NEXT: retl
define <4 x double> @test_mm256_mask_fmsubadd_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) {
; X86-LABEL: test_mm256_mask_fmsubadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd132pd {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) -/+ ymm2
; X86-NEXT: retl
define <4 x double> @test_mm256_mask3_fmaddsub_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm256_mask3_fmaddsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub231pd {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2
; X86-NEXT: vmovapd %ymm2, %ymm0
define <4 x double> @test_mm256_maskz_fmaddsub_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) {
; X86-LABEL: test_mm256_maskz_fmaddsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub213pd {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2
; X86-NEXT: retl
define <4 x double> @test_mm256_maskz_fmsubadd_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) {
; X86-LABEL: test_mm256_maskz_fmsubadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd213pd {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) -/+ ymm2
; X86-NEXT: retl
define <4 x float> @test_mm_mask_fmaddsub_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_mask_fmaddsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub132ps {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2
; X86-NEXT: retl
define <4 x float> @test_mm_mask_fmsubadd_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_mask_fmsubadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd132ps {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) -/+ xmm2
; X86-NEXT: retl
define <4 x float> @test_mm_mask3_fmaddsub_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmaddsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub231ps {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2
; X86-NEXT: vmovaps %xmm2, %xmm0
define <4 x float> @test_mm_maskz_fmaddsub_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fmaddsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub213ps {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_fmsubadd_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_maskz_fmsubadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd213ps {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) -/+ xmm2
; X86-NEXT: retl
define <8 x float> @test_mm256_mask_fmaddsub_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) {
; X86-LABEL: test_mm256_mask_fmaddsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub132ps {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2
; X86-NEXT: retl
define <8 x float> @test_mm256_mask_fmsubadd_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) {
; X86-LABEL: test_mm256_mask_fmsubadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd132ps {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) -/+ ymm2
; X86-NEXT: retl
define <8 x float> @test_mm256_mask3_fmaddsub_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm256_mask3_fmaddsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub231ps {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2
; X86-NEXT: vmovaps %ymm2, %ymm0
define <8 x float> @test_mm256_maskz_fmaddsub_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) {
; X86-LABEL: test_mm256_maskz_fmaddsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub213ps {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2
; X86-NEXT: retl
define <8 x float> @test_mm256_maskz_fmsubadd_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) {
; X86-LABEL: test_mm256_maskz_fmsubadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd213ps {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) -/+ ymm2
; X86-NEXT: retl
define <2 x double> @test_mm_mask3_fmsub_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub231pd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) - xmm2
; X86-NEXT: vmovapd %xmm2, %xmm0
define <4 x double> @test_mm256_mask3_fmsub_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm256_mask3_fmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub231pd {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) - ymm2
; X86-NEXT: vmovapd %ymm2, %ymm0
define <4 x float> @test_mm_mask3_fmsub_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub231ps {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) - xmm2
; X86-NEXT: vmovaps %xmm2, %xmm0
define <8 x float> @test_mm256_mask3_fmsub_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm256_mask3_fmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub231ps {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) - ymm2
; X86-NEXT: vmovaps %ymm2, %ymm0
define <2 x double> @test_mm_mask3_fmsubadd_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmsubadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd231pd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2
; X86-NEXT: vmovapd %xmm2, %xmm0
define <4 x double> @test_mm256_mask3_fmsubadd_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm256_mask3_fmsubadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd231pd {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2
; X86-NEXT: vmovapd %ymm2, %ymm0
define <4 x float> @test_mm_mask3_fmsubadd_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fmsubadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd231ps {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2
; X86-NEXT: vmovaps %xmm2, %xmm0
define <8 x float> @test_mm256_mask3_fmsubadd_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm256_mask3_fmsubadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd231ps {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2
; X86-NEXT: vmovaps %ymm2, %ymm0
define <2 x double> @test_mm_mask_fnmadd_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_mask_fnmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd132pd {{.*#+}} xmm0 {%k1} = -(xmm0 * xmm1) + xmm2
; X86-NEXT: retl
define <4 x double> @test_mm256_mask_fnmadd_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) {
; X86-LABEL: test_mm256_mask_fnmadd_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd132pd {{.*#+}} ymm0 {%k1} = -(ymm0 * ymm1) + ymm2
; X86-NEXT: retl
define <4 x float> @test_mm_mask_fnmadd_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_mask_fnmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd132ps {{.*#+}} xmm0 {%k1} = -(xmm0 * xmm1) + xmm2
; X86-NEXT: retl
define <8 x float> @test_mm256_mask_fnmadd_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) {
; X86-LABEL: test_mm256_mask_fnmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd132ps {{.*#+}} ymm0 {%k1} = -(ymm0 * ymm1) + ymm2
; X86-NEXT: retl
define <2 x double> @test_mm_mask_fnmsub_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) {
; X86-LABEL: test_mm_mask_fnmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub132pd {{.*#+}} xmm0 {%k1} = -(xmm0 * xmm1) - xmm2
; X86-NEXT: retl
define <2 x double> @test_mm_mask3_fnmsub_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub231pd {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
; X86-NEXT: vmovapd %xmm2, %xmm0
define <4 x double> @test_mm256_mask_fnmsub_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) {
; X86-LABEL: test_mm256_mask_fnmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub132pd {{.*#+}} ymm0 {%k1} = -(ymm0 * ymm1) - ymm2
; X86-NEXT: retl
define <4 x double> @test_mm256_mask3_fnmsub_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm256_mask3_fnmsub_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub231pd {{.*#+}} ymm2 {%k1} = -(ymm0 * ymm1) - ymm2
; X86-NEXT: vmovapd %ymm2, %ymm0
define <4 x float> @test_mm_mask_fnmsub_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) {
; X86-LABEL: test_mm_mask_fnmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub132ps {{.*#+}} xmm0 {%k1} = -(xmm0 * xmm1) - xmm2
; X86-NEXT: retl
define <4 x float> @test_mm_mask3_fnmsub_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm_mask3_fnmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub231ps {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
; X86-NEXT: vmovaps %xmm2, %xmm0
define <8 x float> @test_mm256_mask_fnmsub_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) {
; X86-LABEL: test_mm256_mask_fnmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub132ps {{.*#+}} ymm0 {%k1} = -(ymm0 * ymm1) - ymm2
; X86-NEXT: retl
define <8 x float> @test_mm256_mask3_fnmsub_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) {
; X86-LABEL: test_mm256_mask3_fnmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub231ps {{.*#+}} ymm2 {%k1} = -(ymm0 * ymm1) - ymm2
; X86-NEXT: vmovaps %ymm2, %ymm0
; X86-LABEL: test_mm_mask_expandloadu_pd:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1}
; X86-NEXT: retl
; X86-LABEL: test_mm_maskz_expandloadu_pd:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} {z}
; X86-NEXT: retl
; X86-LABEL: test_mm256_mask_expandloadu_pd:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1}
; X86-NEXT: retl
; X86-LABEL: test_mm256_maskz_expandloadu_pd:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} {z}
; X86-NEXT: retl
; X86-LABEL: test_mm_mask_expandloadu_epi64:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1}
; X86-NEXT: retl
; X86-LABEL: test_mm_maskz_expandloadu_epi64:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} {z}
; X86-NEXT: retl
; X86-LABEL: test_mm256_mask_expandloadu_epi64:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1}
; X86-NEXT: retl
; X86-LABEL: test_mm256_maskz_expandloadu_epi64:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} {z}
; X86-NEXT: retl
; X86-LABEL: test_mm_mask_expandloadu_ps:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vexpandps (%eax), %xmm0 {%k1}
; X86-NEXT: retl
; X86-LABEL: test_mm_maskz_expandloadu_ps:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} {z}
; X86-NEXT: retl
; X86-LABEL: test_mm256_mask_expandloadu_ps:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vexpandps (%eax), %ymm0 {%k1}
; X86-NEXT: retl
; X86-LABEL: test_mm256_maskz_expandloadu_ps:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} {z}
; X86-NEXT: retl
; X86-LABEL: test_mm_mask_expandloadu_epi32:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1}
; X86-NEXT: retl
; X86-LABEL: test_mm_maskz_expandloadu_epi32:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} {z}
; X86-NEXT: retl
; X86-LABEL: test_mm256_mask_expandloadu_epi32:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1}
; X86-NEXT: retl
; X86-LABEL: test_mm256_maskz_expandloadu_epi32:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} {z}
; X86-NEXT: retl
define void @test_mm_mask_compressstoreu_pd(ptr %__P, i8 zeroext %__U, <2 x double> %__A) {
; X86-LABEL: test_mm_mask_compressstoreu_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcompresspd %xmm0, (%ecx) {%k1}
define void @test_mm256_mask_compressstoreu_pd(ptr %__P, i8 zeroext %__U, <4 x double> %__A) {
; X86-LABEL: test_mm256_mask_compressstoreu_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcompresspd %ymm0, (%ecx) {%k1}
define void @test_mm_mask_compressstoreu_epi64(ptr %__P, i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_mask_compressstoreu_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpcompressq %xmm0, (%ecx) {%k1}
define void @test_mm256_mask_compressstoreu_epi64(ptr %__P, i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_mask_compressstoreu_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpcompressq %ymm0, (%ecx) {%k1}
define void @test_mm_mask_compressstoreu_ps(ptr %__P, i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_mask_compressstoreu_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcompressps %xmm0, (%ecx) {%k1}
define void @test_mm256_mask_compressstoreu_ps(ptr %__P, i8 zeroext %__U, <8 x float> %__A) {
; X86-LABEL: test_mm256_mask_compressstoreu_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vcompressps %ymm0, (%ecx) {%k1}
define void @test_mm_mask_compressstoreu_epi32(ptr %__P, i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_mask_compressstoreu_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpcompressd %xmm0, (%ecx) {%k1}
define void @test_mm256_mask_compressstoreu_epi32(ptr %__P, i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_mask_compressstoreu_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpcompressd %ymm0, (%ecx) {%k1}
define <2 x double> @test_mm_mask_sqrt_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A) {
; X86-LABEL: test_mm_mask_sqrt_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtpd %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x double> @test_mm_maskz_sqrt_pd(i8 zeroext %__U, <2 x double> %__A) {
; X86-LABEL: test_mm_maskz_sqrt_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtpd %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x double> @test_mm256_mask_sqrt_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A) {
; X86-LABEL: test_mm256_mask_sqrt_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtpd %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x double> @test_mm256_maskz_sqrt_pd(i8 zeroext %__U, <4 x double> %__A) {
; X86-LABEL: test_mm256_maskz_sqrt_pd:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <4 x float> @test_mm_mask_sqrt_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_mask_sqrt_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtps %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <4 x float> @test_mm_maskz_sqrt_ps(i8 zeroext %__U, <4 x float> %__A) {
; X86-LABEL: test_mm_maskz_sqrt_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtps %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <8 x float> @test_mm256_mask_sqrt_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A) {
; X86-LABEL: test_mm256_mask_sqrt_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtps %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <8 x float> @test_mm256_maskz_sqrt_ps(i8 zeroext %__U, <8 x float> %__A) {
; X86-LABEL: test_mm256_maskz_sqrt_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_rol_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_mask_rol_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprold $5, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_rol_epi32(i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_maskz_rol_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprold $5, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_rol_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_mask_rol_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprold $5, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_rol_epi32(i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_maskz_rol_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprold $5, %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_rol_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_mask_rol_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolq $5, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_rol_epi64(i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_maskz_rol_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolq $5, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_rol_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_mask_rol_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolq $5, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_rol_epi64(i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_maskz_rol_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolq $5, %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_rolv_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_rolv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolvd %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_rolv_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_rolv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_rolv_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_rolv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolvd %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_rolv_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_maskz_rolv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_rolv_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_rolv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolvq %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_rolv_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_rolv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_rolv_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_rolv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolvq %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_rolv_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_maskz_rolv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_ror_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_mask_ror_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprord $5, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_ror_epi32(i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_maskz_ror_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprord $5, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_ror_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_mask_ror_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprord $5, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_ror_epi32(i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_maskz_ror_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprord $5, %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_ror_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_mask_ror_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorq $5, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_ror_epi64(i8 zeroext %__U, <2 x i64> %__A) {
; X86-LABEL: test_mm_maskz_ror_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorq $5, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_ror_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_mask_ror_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorq $5, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_ror_epi64(i8 zeroext %__U, <4 x i64> %__A) {
; X86-LABEL: test_mm256_maskz_ror_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorq $5, %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_rorv_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_rorv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorvd %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_rorv_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_rorv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_rorv_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_rorv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorvd %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_rorv_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_maskz_rorv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
define <2 x i64> @test_mm_mask_rorv_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_mask_rorv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorvq %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
define <2 x i64> @test_mm_maskz_rorv_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
; X86-LABEL: test_mm_maskz_rorv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
define <4 x i64> @test_mm256_mask_rorv_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_mask_rorv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorvq %ymm2, %ymm1, %ymm0 {%k1}
; X86-NEXT: retl
define <4 x i64> @test_mm256_maskz_rorv_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
; X86-LABEL: test_mm256_maskz_rorv_epi64:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z}
; X86-NEXT: retl
; X86: # %bb.0:
; X86-NEXT: vptestmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1]
; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8]
; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86: # %bb.0:
; X86-NEXT: vptestnmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1]
; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8]
; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: movd %eax, %xmm0
; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
; SSE2-SSSE3-NEXT: retq
;
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: movd %eax, %xmm0
; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
; SSE2-SSSE3-NEXT: retq
;
; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: movd %eax, %xmm0
; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al
; SSE2-SSSE3-NEXT: retq
;
define i8 @test_bitreverse_i8(i8 %a) {
; X86-LABEL: test_bitreverse_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rolb $4, %al
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andb $51, %cl
define i4 @test_bitreverse_i4(i4 %a) {
; X86-LABEL: test_bitreverse_i4:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: andb $15, %al
; X86-NEXT: movl %ecx, %edx
define i8 @identity_i8(i8 %a) {
; X86-LABEL: identity_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
; X64-LABEL: identity_i8:
;
; X86XOP-LABEL: identity_i8:
; X86XOP: # %bb.0:
-; X86XOP-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86XOP-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86XOP-NEXT: retl
%b = call i8 @llvm.bitreverse.i8(i8 %a)
%c = call i8 @llvm.bitreverse.i8(i8 %b)
define i1 @andn_cmp_i8(i8 %x, i8 %y) {
; X86-LABEL: andn_cmp_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: notb %al
; X86-NEXT: testb %al, {{[0-9]+}}(%esp)
; X86-NEXT: sete %al
;
; X32-LABEL: sub_zext_cmp_mask_narrower_result:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: andb $1, %al
; X32-NEXT: orb $46, %al
; X32-NEXT: retl
;
; X32-LABEL: add_zext_cmp_mask_same_size_result:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: andb $1, %al
; X32-NEXT: xorb $27, %al
; X32-NEXT: retl
;
; X32-LABEL: add_zext_cmp_mask_narrower_result:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: andb $1, %al
; X32-NEXT: xorb $43, %al
; X32-NEXT: retl
;
; X32-LABEL: low_bit_select_constants_bigger_true_same_size_result:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: andb $1, %al
; X32-NEXT: xorb $-29, %al
; X32-NEXT: retl
;
; X32-LABEL: low_bit_select_constants_bigger_true_narrower_result:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: andb $1, %al
; X32-NEXT: xorb $41, %al
; X32-NEXT: retl
define i32 @PR15215_bad(<4 x i32> %input) {
; X86-LABEL: PR15215_bad:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
; X86-NEXT: addb %ah, %ah
; X86-NEXT: andb $1, %cl
define i32 @test1(i32 %a, i32 %b) nounwind ssp {
; CHECK-LABEL: test1:
; CHECK: ## %bb.0: ## %entry
-; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al
+; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: xorb {{[0-9]+}}(%esp), %al
; CHECK-NEXT: testb $64, %al
; CHECK-NEXT: je LBB0_1
define zeroext i1 @demanded_with_known_zeroes(i32 %bit, i32 %bits) {
; X86-LABEL: demanded_with_known_zeroes:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlb $2, %al
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzbl %al, %eax
; X86-LABEL: btr_16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: btrw %cx, %ax
; X86-NEXT: retl
%1 = shl i16 1, %n
;
; X86-LABEL: bts_16:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %eax
; X86-NEXT: shll %cl, %eax
; X86-NEXT: orw {{[0-9]+}}(%esp), %ax
;
; X86-LABEL: btc_16:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %eax
; X86-NEXT: shll %cl, %eax
; X86-NEXT: xorw {{[0-9]+}}(%esp), %ax
; X86-LABEL: btr_32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: btrl %ecx, %eax
; X86-NEXT: retl
%1 = shl i32 1, %n
; X86-LABEL: bts_32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: btsl %ecx, %eax
; X86-NEXT: retl
%1 = shl i32 1, %n
; X86-LABEL: btc_32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: btcl %ecx, %eax
; X86-NEXT: retl
%1 = shl i32 1, %n
;
; X86-LABEL: btr_64:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: shldl %cl, %eax, %edx
;
; X86-LABEL: bts_64:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: shldl %cl, %eax, %edx
;
; X86-LABEL: btc_64:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: shldl %cl, %eax, %edx
; X86-LABEL: btr_16_mask:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: btrw %cx, %ax
; X86-NEXT: retl
%1 = and i16 %n, 15
;
; X86-LABEL: bts_16_mask:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $15, %cl
; X86-NEXT: movl $1, %eax
; X86-NEXT: shll %cl, %eax
;
; X86-LABEL: btc_16_mask:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $15, %cl
; X86-NEXT: movl $1, %eax
; X86-NEXT: shll %cl, %eax
; X86-LABEL: btr_32_mask:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: btrl %ecx, %eax
; X86-NEXT: retl
%1 = and i32 %n, 31
; X86-LABEL: bts_32_mask:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: btsl %ecx, %eax
; X86-NEXT: retl
%1 = and i32 %n, 31
; X86-LABEL: btc_32_mask:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: btcl %ecx, %eax
; X86-NEXT: retl
%1 = and i32 %n, 31
;
; X86-LABEL: btr_64_mask:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: shldl %cl, %eax, %edx
;
; X86-LABEL: bts_64_mask:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: shldl %cl, %eax, %edx
;
; X86-LABEL: btc_64_mask:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: shldl %cl, %eax, %edx
;
; X86-LABEL: btr_16_load:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: btrw %cx, %ax
; X86-LABEL: bts_16_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %eax
; X86-NEXT: shll %cl, %eax
; X86-NEXT: orw (%edx), %ax
; X86-LABEL: btc_16_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %eax
; X86-NEXT: shll %cl, %eax
; X86-NEXT: xorw (%edx), %ax
;
; X86-LABEL: btr_32_load:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: btrl %ecx, %eax
;
; X86-LABEL: bts_32_load:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: btsl %ecx, %eax
;
; X86-LABEL: btc_32_load:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: btcl %ecx, %eax
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: shldl %cl, %eax, %edx
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: shldl %cl, %eax, %edx
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: shldl %cl, %eax, %edx
; X86-LABEL: btr_16_dont_fold:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movw $-2, %dx
; X86-NEXT: rolw %cl, %dx
; X86-NEXT: andw %dx, (%eax)
; X86-LABEL: bts_16_dont_fold:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %edx
; X86-NEXT: shll %cl, %edx
; X86-NEXT: orw %dx, (%eax)
; X86-LABEL: btc_16_dont_fold:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %edx
; X86-NEXT: shll %cl, %edx
; X86-NEXT: xorw %dx, (%eax)
; X86-LABEL: btr_32_dont_fold:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-2, %edx
; X86-NEXT: roll %cl, %edx
; X86-NEXT: andl %edx, (%eax)
; X86-LABEL: bts_32_dont_fold:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %edx
; X86-NEXT: shll %cl, %edx
; X86-NEXT: orl %edx, (%eax)
; X86-LABEL: btc_32_dont_fold:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %edx
; X86-NEXT: shll %cl, %edx
; X86-NEXT: xorl %edx, (%eax)
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %edx
; X86-NEXT: xorl %esi, %esi
; X86-NEXT: shldl %cl, %edx, %esi
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %edx
; X86-NEXT: xorl %esi, %esi
; X86-NEXT: shldl %cl, %edx, %esi
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %edx
; X86-NEXT: xorl %esi, %esi
; X86-NEXT: shldl %cl, %edx, %esi
;
; X86-LABEL: btr_32_mask_zeros:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shlb $2, %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: btrl %ecx, %eax
;
; X86-LABEL: bts_32_mask_zeros:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shlb $2, %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: btsl %ecx, %eax
;
; X86-LABEL: btc_32_mask_zeros:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shlb $2, %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: btcl %ecx, %eax
; X64-NEXT: movq %rsp, %rdi
; X64-NEXT: movq %rbx, %rsi
; X64-NEXT: rep;movsq (%rsi), %es:(%rdi)
-; X64-NEXT: movb {{[0-9]+}}(%rsp), %al
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movb %al, {{[0-9]+}}(%rsp)
; X64-NEXT: callq f@PLT
; X64-NEXT: movl $16, %ecx
; X64-NEXT: movq %rsp, %rdi
; X64-NEXT: movq %rbx, %rsi
; X64-NEXT: rep;movsq (%rsi), %es:(%rdi)
-; X64-NEXT: movb {{[0-9]+}}(%rsp), %al
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movb %al, {{[0-9]+}}(%rsp)
; X64-NEXT: callq f@PLT
; X64-NEXT: addq $272, %rsp # imm = 0x110
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $272, %esp # imm = 0x110
-; X86-NEXT: movb 28(%ebp), %al
-; X86-NEXT: movb 24(%ebp), %cl
-; X86-NEXT: movb 20(%ebp), %dl
+; X86-NEXT: movzbl 28(%ebp), %eax
+; X86-NEXT: movzbl 24(%ebp), %ecx
+; X86-NEXT: movzbl 20(%ebp), %edx
; X86-NEXT: movb 16(%ebp), %ah
; X86-NEXT: movb 12(%ebp), %ch
; X86-NEXT: movb 8(%ebp), %dh
; X86-NEXT: movl %esp, %edi
; X86-NEXT: movl %ebx, %esi
; X86-NEXT: rep;movsl (%esi), %es:(%edi)
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb %al, {{[0-9]+}}(%esp)
; X86-NEXT: calll f@PLT
; X86-NEXT: movl $32, %ecx
; X86-NEXT: movl %esp, %edi
; X86-NEXT: movl %ebx, %esi
; X86-NEXT: rep;movsl (%esi), %es:(%edi)
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb %al, {{[0-9]+}}(%esp)
; X86-NEXT: calll f@PLT
; X86-NEXT: leal -12(%ebp), %esp
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: movabsq $9223372036854771712, %rdx # imm = 0x7FFFFFFFFFFFF000
; CHECK-NEXT: andq %rax, %rdx
-; CHECK-NEXT: movb pgdir_shift(%rip), %al
+; CHECK-NEXT: movzbl pgdir_shift(%rip), %eax
; CHECK-NEXT: movq page_offset_base(%rip), %rcx
; CHECK-NEXT: shrxq %rax, %rdi, %rax
; CHECK-NEXT: addq %rcx, %rdx
define i8 @clear_highbits8_c0(i8 %val, i8 %numhighbits) nounwind {
; X86-LABEL: clear_highbits8_c0:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlb %cl, %al
; X86-NEXT: shrb %cl, %al
; X86-NEXT: retl
define i8 @clear_highbits8_c2_load(ptr %w, i8 %numhighbits) nounwind {
; X86-LABEL: clear_highbits8_c2_load:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb (%eax), %al
+; X86-NEXT: movzbl (%eax), %eax
; X86-NEXT: shlb %cl, %al
; X86-NEXT: shrb %cl, %al
; X86-NEXT: retl
; X64-LABEL: clear_highbits8_c2_load:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movb (%rdi), %al
+; X64-NEXT: movzbl (%rdi), %eax
; X64-NEXT: shlb %cl, %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrb %cl, %al
define i8 @clear_highbits8_c4_commutative(i8 %val, i8 %numhighbits) nounwind {
; X86-LABEL: clear_highbits8_c4_commutative:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlb %cl, %al
; X86-NEXT: shrb %cl, %al
; X86-NEXT: retl
define i16 @clear_highbits16_c0(i16 %val, i16 %numhighbits) nounwind {
; X86-NOBMI2-LABEL: clear_highbits16_c0:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-NOBMI2-NEXT: movzwl %ax, %eax
;
; X86-BMI2-LABEL: clear_highbits16_c0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movzwl %cx, %ecx
; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
define i16 @clear_highbits16_c1_indexzext(i16 %val, i8 %numhighbits) nounwind {
; X86-NOBMI2-LABEL: clear_highbits16_c1_indexzext:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-NOBMI2-NEXT: movzwl %ax, %eax
;
; X86-BMI2-LABEL: clear_highbits16_c1_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movzwl %cx, %ecx
; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
define i16 @clear_highbits16_c2_load(ptr %w, i16 %numhighbits) nounwind {
; X86-NOBMI2-LABEL: clear_highbits16_c2_load:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: movzwl (%eax), %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
;
; X86-BMI2-LABEL: clear_highbits16_c2_load:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movzwl (%ecx), %ecx
; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx
define i16 @clear_highbits16_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind {
; X86-NOBMI2-LABEL: clear_highbits16_c3_load_indexzext:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: movzwl (%eax), %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
;
; X86-BMI2-LABEL: clear_highbits16_c3_load_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movzwl (%ecx), %ecx
; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx
define i16 @clear_highbits16_c4_commutative(i16 %val, i16 %numhighbits) nounwind {
; X86-NOBMI2-LABEL: clear_highbits16_c4_commutative:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-NOBMI2-NEXT: movzwl %ax, %eax
;
; X86-BMI2-LABEL: clear_highbits16_c4_commutative:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movzwl %cx, %ecx
; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
define i32 @clear_highbits32_c0(i32 %val, i32 %numhighbits) nounwind {
; X86-NOBMI2-LABEL: clear_highbits32_c0:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-NOBMI2-NEXT: shrl %cl, %eax
;
; X86-BMI2-LABEL: clear_highbits32_c0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl $32, %ecx
; X86-BMI2-NEXT: subl %eax, %ecx
; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax
define i32 @clear_highbits32_c1_indexzext(i32 %val, i8 %numhighbits) nounwind {
; X86-NOBMI2-LABEL: clear_highbits32_c1_indexzext:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-NOBMI2-NEXT: shrl %cl, %eax
;
; X86-BMI2-LABEL: clear_highbits32_c1_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl $32, %ecx
; X86-BMI2-NEXT: subl %eax, %ecx
; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax
define i32 @clear_highbits32_c2_load(ptr %w, i32 %numhighbits) nounwind {
; X86-NOBMI2-LABEL: clear_highbits32_c2_load:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: movl (%eax), %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-BMI2-LABEL: clear_highbits32_c2_load:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl $32, %edx
; X86-BMI2-NEXT: subl %ecx, %edx
; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax
define i32 @clear_highbits32_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind {
; X86-NOBMI2-LABEL: clear_highbits32_c3_load_indexzext:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: movl (%eax), %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-BMI2-LABEL: clear_highbits32_c3_load_indexzext:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl $32, %edx
; X86-BMI2-NEXT: subl %ecx, %edx
; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax
define i32 @clear_highbits32_c4_commutative(i32 %val, i32 %numhighbits) nounwind {
; X86-NOBMI2-LABEL: clear_highbits32_c4_commutative:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-NOBMI2-NEXT: shrl %cl, %eax
;
; X86-BMI2-LABEL: clear_highbits32_c4_commutative:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl $32, %ecx
; X86-BMI2-NEXT: subl %eax, %ecx
; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax
; X86-BASELINE-LABEL: clear_highbits64_c0:
; X86-BASELINE: # %bb.0:
; X86-BASELINE-NEXT: pushl %esi
-; X86-BASELINE-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BASELINE-NEXT: movl $-1, %eax
; X86-BASELINE-NEXT: movl $-1, %esi
; X86-BASELINE-NEXT: shrl %cl, %esi
; X86-BMI1-LABEL: clear_highbits64_c0:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $-1, %esi
; X86-BMI1-NEXT: movl $-1, %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI2-LABEL: clear_highbits64_c0:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl $-1, %eax
; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BASELINE-LABEL: clear_highbits64_c1_indexzext:
; X86-BASELINE: # %bb.0:
; X86-BASELINE-NEXT: pushl %esi
-; X86-BASELINE-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BASELINE-NEXT: movl $-1, %eax
; X86-BASELINE-NEXT: movl $-1, %esi
; X86-BASELINE-NEXT: shrl %cl, %esi
; X86-BMI1-LABEL: clear_highbits64_c1_indexzext:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $-1, %esi
; X86-BMI1-NEXT: movl $-1, %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI2-LABEL: clear_highbits64_c1_indexzext:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl $-1, %eax
; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BASELINE-NEXT: pushl %edi
; X86-BASELINE-NEXT: pushl %esi
; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BASELINE-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BASELINE-NEXT: movl $-1, %eax
; X86-BASELINE-NEXT: movl $-1, %edi
; X86-BASELINE-NEXT: shrl %cl, %edi
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $-1, %edi
; X86-BMI1-NEXT: movl $-1, %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; X86-BMI2-NEXT: movl $-1, %eax
; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BASELINE-NEXT: pushl %edi
; X86-BASELINE-NEXT: pushl %esi
; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BASELINE-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BASELINE-NEXT: movl $-1, %eax
; X86-BASELINE-NEXT: movl $-1, %edi
; X86-BASELINE-NEXT: shrl %cl, %edi
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $-1, %edi
; X86-BMI1-NEXT: movl $-1, %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; X86-BMI2-NEXT: movl $-1, %eax
; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BASELINE-LABEL: clear_highbits64_c4_commutative:
; X86-BASELINE: # %bb.0:
; X86-BASELINE-NEXT: pushl %esi
-; X86-BASELINE-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BASELINE-NEXT: movl $-1, %eax
; X86-BASELINE-NEXT: movl $-1, %esi
; X86-BASELINE-NEXT: shrl %cl, %esi
; X86-BMI1-LABEL: clear_highbits64_c4_commutative:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $-1, %esi
; X86-BMI1-NEXT: movl $-1, %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI2-LABEL: clear_highbits64_c4_commutative:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl $-1, %eax
; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-NOBMI2-LABEL: oneuse32_c:
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: movl %eax, (%edx)
; X86-BMI2-LABEL: oneuse32_c:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shrxl %eax, %edx, %eax
; X86-BMI2-NEXT: movl %eax, (%ecx)
; X86-BASELINE: # %bb.0:
; X86-BASELINE-NEXT: pushl %esi
; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BASELINE-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BASELINE-NEXT: movl $-1, %eax
; X86-BASELINE-NEXT: movl $-1, %edx
; X86-BASELINE-NEXT: shrl %cl, %edx
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $-1, %eax
; X86-BMI1-NEXT: movl $-1, %edi
; X86-BMI1-NEXT: shrl %cl, %edi
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; X86-BMI2-NEXT: movl $-1, %eax
; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-NOBMI2-LABEL: oneuse32_d:
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-NOBMI2-NEXT: movl %eax, (%edx)
; X86-BMI2-LABEL: oneuse32_d:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shlxl %ecx, {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl %edx, (%eax)
; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax
; X86-BASELINE-NEXT: pushl %ebx
; X86-BASELINE-NEXT: pushl %edi
; X86-BASELINE-NEXT: pushl %esi
-; X86-BASELINE-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BASELINE-NEXT: movl %edx, %edi
; X86-BMI1-NEXT: pushl %ebx
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl %edx, %eax
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: shldl %cl, %eax, %esi
define i8 @clear_lowbits8_c0(i8 %val, i8 %numlowbits) nounwind {
; X86-LABEL: clear_lowbits8_c0:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrb %cl, %al
; X86-NEXT: shlb %cl, %al
; X86-NEXT: retl
define i8 @clear_lowbits8_c2_load(ptr %w, i8 %numlowbits) nounwind {
; X86-LABEL: clear_lowbits8_c2_load:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb (%eax), %al
+; X86-NEXT: movzbl (%eax), %eax
; X86-NEXT: shrb %cl, %al
; X86-NEXT: shlb %cl, %al
; X86-NEXT: retl
; X64-LABEL: clear_lowbits8_c2_load:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movb (%rdi), %al
+; X64-NEXT: movzbl (%rdi), %eax
; X64-NEXT: shrb %cl, %al
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shlb %cl, %al
define i8 @clear_lowbits8_c4_commutative(i8 %val, i8 %numlowbits) nounwind {
; X86-LABEL: clear_lowbits8_c4_commutative:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrb %cl, %al
; X86-NEXT: shlb %cl, %al
; X86-NEXT: retl
define i16 @clear_lowbits16_c0(i16 %val, i16 %numlowbits) nounwind {
; X86-NOBMI2-LABEL: clear_lowbits16_c0:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-BMI2-LABEL: clear_lowbits16_c0:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
define i16 @clear_lowbits16_c1_indexzext(i16 %val, i8 %numlowbits) nounwind {
; X86-NOBMI2-LABEL: clear_lowbits16_c1_indexzext:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-BMI2-LABEL: clear_lowbits16_c1_indexzext:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
define i16 @clear_lowbits16_c2_load(ptr %w, i16 %numlowbits) nounwind {
; X86-NOBMI2-LABEL: clear_lowbits16_c2_load:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: movzwl (%eax), %eax
; X86-NOBMI2-NEXT: shrl %cl, %eax
;
; X86-BMI2-LABEL: clear_lowbits16_c2_load:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movzwl (%ecx), %ecx
; X86-BMI2-NEXT: shrxl %eax, %ecx, %ecx
define i16 @clear_lowbits16_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
; X86-NOBMI2-LABEL: clear_lowbits16_c3_load_indexzext:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: movzwl (%eax), %eax
; X86-NOBMI2-NEXT: shrl %cl, %eax
;
; X86-BMI2-LABEL: clear_lowbits16_c3_load_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movzwl (%ecx), %ecx
; X86-BMI2-NEXT: shrxl %eax, %ecx, %ecx
define i16 @clear_lowbits16_c4_commutative(i16 %val, i16 %numlowbits) nounwind {
; X86-NOBMI2-LABEL: clear_lowbits16_c4_commutative:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-BMI2-LABEL: clear_lowbits16_c4_commutative:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
define i32 @clear_lowbits32_c0(i32 %val, i32 %numlowbits) nounwind {
; X86-NOBMI2-LABEL: clear_lowbits32_c0:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
;
; X86-BMI2-LABEL: clear_lowbits32_c0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
define i32 @clear_lowbits32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
; X86-NOBMI2-LABEL: clear_lowbits32_c1_indexzext:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
;
; X86-BMI2-LABEL: clear_lowbits32_c1_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
define i32 @clear_lowbits32_c2_load(ptr %w, i32 %numlowbits) nounwind {
; X86-NOBMI2-LABEL: clear_lowbits32_c2_load:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: movl (%eax), %eax
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-BMI2-LABEL: clear_lowbits32_c2_load:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X86-BMI2-NEXT: retl
define i32 @clear_lowbits32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
; X86-NOBMI2-LABEL: clear_lowbits32_c3_load_indexzext:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: movl (%eax), %eax
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-BMI2-LABEL: clear_lowbits32_c3_load_indexzext:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X86-BMI2-NEXT: retl
define i32 @clear_lowbits32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
; X86-NOBMI2-LABEL: clear_lowbits32_c4_commutative:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
;
; X86-BMI2-LABEL: clear_lowbits32_c4_commutative:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
define i64 @clear_lowbits64_c0(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI2-LABEL: clear_lowbits64_c0:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
;
; X86-BMI2-LABEL: clear_lowbits64_c0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
; X86-BMI2-NEXT: testb $32, %cl
define i64 @clear_lowbits64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
; X86-NOBMI2-LABEL: clear_lowbits64_c1_indexzext:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
;
; X86-BMI2-LABEL: clear_lowbits64_c1_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
; X86-BMI2-NEXT: testb $32, %cl
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: pushl %esi
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax
; X86-BMI2-NEXT: testb $32, %bl
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: pushl %esi
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax
; X86-BMI2-NEXT: testb $32, %bl
define i64 @clear_lowbits64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI2-LABEL: clear_lowbits64_c4_commutative:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
;
; X86-BMI2-LABEL: clear_lowbits64_c4_commutative:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
; X86-BMI2-NEXT: testb $32, %cl
define i8 @clear_lowbits8_ic0(i8 %val, i8 %numlowbits) nounwind {
; X86-LABEL: clear_lowbits8_ic0:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb $8, %cl
; X86-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NEXT: shrb %cl, %al
; X86-LABEL: clear_lowbits8_ic2_load:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb (%eax), %al
+; X86-NEXT: movzbl (%eax), %eax
; X86-NEXT: movb $8, %cl
; X86-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NEXT: shrb %cl, %al
;
; X64-LABEL: clear_lowbits8_ic2_load:
; X64: # %bb.0:
-; X64-NEXT: movb (%rdi), %al
+; X64-NEXT: movzbl (%rdi), %eax
; X64-NEXT: movb $8, %cl
; X64-NEXT: subb %sil, %cl
; X64-NEXT: shrb %cl, %al
define i8 @clear_lowbits8_ic4_commutative(i8 %val, i8 %numlowbits) nounwind {
; X86-LABEL: clear_lowbits8_ic4_commutative:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb $8, %cl
; X86-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NEXT: shrb %cl, %al
; X86-NOBMI2-LABEL: oneuse32_c:
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-NOBMI2-NEXT: movl %eax, (%edx)
; X86-BMI2-LABEL: oneuse32_c:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %eax, %edx, %eax
; X86-BMI2-NEXT: movl %eax, (%ecx)
; X86-NOBMI2-NEXT: pushl %edi
; X86-NOBMI2-NEXT: pushl %esi
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %edi
; X86-NOBMI2-NEXT: shll %cl, %edi
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ebx, %edx, %esi
; X86-BMI2-NEXT: xorl %eax, %eax
define i8 @ctlz_i8_zero_test(i8 %n) {
; X86-LABEL: ctlz_i8_zero_test:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: testb %al, %al
; X86-NEXT: je .LBB8_1
; X86-NEXT: # %bb.2: # %cond.false
define i8 @cttz_i8_zero_test(i8 %n) {
; X86-LABEL: cttz_i8_zero_test:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: testb %al, %al
; X86-NEXT: je .LBB12_1
; X86-NEXT: # %bb.2: # %cond.false
define i8 @cttz_i8_knownbits(i8 %x) {
; X86-LABEL: cttz_i8_knownbits:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orb $2, %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: bsfl %eax, %eax
;
; X86-CLZ-LABEL: cttz_i8_knownbits:
; X86-CLZ: # %bb.0:
-; X86-CLZ-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-CLZ-NEXT: orb $2, %al
; X86-CLZ-NEXT: movzbl %al, %eax
; X86-CLZ-NEXT: tzcntl %eax, %eax
define i8 @ctlz_i8_knownbits(i8 %x) {
; X86-LABEL: ctlz_i8_knownbits:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orb $64, %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: bsrl %eax, %eax
;
; X86-CLZ-LABEL: ctlz_i8_knownbits:
; X86-CLZ: # %bb.0:
-; X86-CLZ-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-CLZ-NEXT: orb $64, %al
; X86-CLZ-NEXT: movzbl %al, %eax
; X86-CLZ-NEXT: lzcntl %eax, %eax
; CHECK-NEXT: xorb $1, %cl
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: sarl %cl, %edx
-; CHECK-NEXT: movb g_96(%rip), %al
+; CHECK-NEXT: movzbl g_96(%rip), %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB3_2
; CHECK-NEXT: # %bb.1: # %bb.i.i.i
-; CHECK-NEXT: movb g_100(%rip), %cl
+; CHECK-NEXT: movzbl g_100(%rip), %ecx
; CHECK-NEXT: .LBB3_2: # %func_4.exit.i
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: testb %dl, %dl
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: jne .LBB3_5
; CHECK-NEXT: # %bb.4: # %bb.i.i
-; CHECK-NEXT: movb g_100(%rip), %cl
+; CHECK-NEXT: movzbl g_100(%rip), %ecx
; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: .LBB3_5: # %func_1.exit
; NOCMOV-NEXT: movb %al, g8
; NOCMOV-NEXT: retl
; NOCMOV-NEXT: .LBB7_1: # %entry
-; NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %cl
+; NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; NOCMOV-NEXT: jg .LBB7_4
; NOCMOV-NEXT: .LBB7_3: # %entry
; NOCMOV-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl $5, %r8d
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: shll %cl, %r8d
-; CHECK-NEXT: movb (%rsi,%rdx), %al
+; CHECK-NEXT: movzbl (%rsi,%rdx), %eax
; CHECK-NEXT: xorb (%rdi,%rdx), %al
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: andl %r8d, %eax
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movb 4(%ecx), %cl
+; X86-NEXT: movzbl 4(%ecx), %ecx
; X86-NEXT: movb %cl, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: demand_one_loaded_byte:
; X64: # %bb.0:
-; X64-NEXT: movb 4(%rdi), %al
+; X64-NEXT: movzbl 4(%rdi), %eax
; X64-NEXT: movb %al, (%rsi)
; X64-NEXT: retq
%x = load i64, ptr %xp, align 8
define i8 @test_i8_7_mask_lshr_1(i8 %a0) {
; X86-LABEL: test_i8_7_mask_lshr_1:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $6, %al
; X86-NEXT: shrb %al
; X86-NEXT: retl
define i8 @test_i8_28_mask_lshr_1(i8 %a0) {
; X86-LABEL: test_i8_28_mask_lshr_1:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $28, %al
; X86-NEXT: shrb %al
; X86-NEXT: retl
define i8 @test_i8_28_mask_lshr_2(i8 %a0) {
; X86-LABEL: test_i8_28_mask_lshr_2:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $28, %al
; X86-NEXT: shrb $2, %al
; X86-NEXT: retl
define i8 @test_i8_28_mask_lshr_3(i8 %a0) {
; X86-LABEL: test_i8_28_mask_lshr_3:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $24, %al
; X86-NEXT: shrb $3, %al
; X86-NEXT: retl
define i8 @test_i8_28_mask_lshr_4(i8 %a0) {
; X86-LABEL: test_i8_28_mask_lshr_4:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $16, %al
; X86-NEXT: shrb $4, %al
; X86-NEXT: retl
define i8 @test_i8_224_mask_lshr_1(i8 %a0) {
; X86-LABEL: test_i8_224_mask_lshr_1:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $-32, %al
; X86-NEXT: shrb %al
; X86-NEXT: retl
define i8 @test_i8_224_mask_lshr_4(i8 %a0) {
; X86-LABEL: test_i8_224_mask_lshr_4:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $-32, %al
; X86-NEXT: shrb $4, %al
; X86-NEXT: retl
define i8 @test_i8_224_mask_lshr_5(i8 %a0) {
; X86-LABEL: test_i8_224_mask_lshr_5:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrb $5, %al
; X86-NEXT: retl
;
define i8 @test_i8_224_mask_lshr_6(i8 %a0) {
; X86-LABEL: test_i8_224_mask_lshr_6:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrb $6, %al
; X86-NEXT: retl
;
define i8 @test_i8_7_mask_ashr_1(i8 %a0) {
; X86-LABEL: test_i8_7_mask_ashr_1:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $6, %al
; X86-NEXT: shrb %al
; X86-NEXT: retl
define i8 @test_i8_28_mask_ashr_1(i8 %a0) {
; X86-LABEL: test_i8_28_mask_ashr_1:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $28, %al
; X86-NEXT: shrb %al
; X86-NEXT: retl
define i8 @test_i8_28_mask_ashr_2(i8 %a0) {
; X86-LABEL: test_i8_28_mask_ashr_2:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $28, %al
; X86-NEXT: shrb $2, %al
; X86-NEXT: retl
define i8 @test_i8_28_mask_ashr_3(i8 %a0) {
; X86-LABEL: test_i8_28_mask_ashr_3:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $24, %al
; X86-NEXT: shrb $3, %al
; X86-NEXT: retl
define i8 @test_i8_28_mask_ashr_4(i8 %a0) {
; X86-LABEL: test_i8_28_mask_ashr_4:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $16, %al
; X86-NEXT: shrb $4, %al
; X86-NEXT: retl
define i8 @test_i8_224_mask_ashr_1(i8 %a0) {
; X86-LABEL: test_i8_224_mask_ashr_1:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $-32, %al
; X86-NEXT: sarb %al
; X86-NEXT: retl
define i8 @test_i8_224_mask_ashr_4(i8 %a0) {
; X86-LABEL: test_i8_224_mask_ashr_4:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $-32, %al
; X86-NEXT: sarb $4, %al
; X86-NEXT: retl
define i8 @test_i8_224_mask_ashr_5(i8 %a0) {
; X86-LABEL: test_i8_224_mask_ashr_5:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sarb $5, %al
; X86-NEXT: retl
;
define i8 @test_i8_224_mask_ashr_6(i8 %a0) {
; X86-LABEL: test_i8_224_mask_ashr_6:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sarb $6, %al
; X86-NEXT: retl
;
define i8 @test_i8_7_mask_shl_1(i8 %a0) {
; X86-LABEL: test_i8_7_mask_shl_1:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $7, %al
; X86-NEXT: addb %al, %al
; X86-NEXT: retl
define i8 @test_i8_7_mask_shl_4(i8 %a0) {
; X86-LABEL: test_i8_7_mask_shl_4:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $7, %al
; X86-NEXT: shlb $4, %al
; X86-NEXT: retl
define i8 @test_i8_7_mask_shl_5(i8 %a0) {
; X86-LABEL: test_i8_7_mask_shl_5:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlb $5, %al
; X86-NEXT: retl
;
define i8 @test_i8_7_mask_shl_6(i8 %a0) {
; X86-LABEL: test_i8_7_mask_shl_6:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlb $6, %al
; X86-NEXT: retl
;
define i8 @test_i8_28_mask_shl_1(i8 %a0) {
; X86-LABEL: test_i8_28_mask_shl_1:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $28, %al
; X86-NEXT: addb %al, %al
; X86-NEXT: retl
define i8 @test_i8_28_mask_shl_2(i8 %a0) {
; X86-LABEL: test_i8_28_mask_shl_2:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $28, %al
; X86-NEXT: shlb $2, %al
; X86-NEXT: retl
define i8 @test_i8_28_mask_shl_3(i8 %a0) {
; X86-LABEL: test_i8_28_mask_shl_3:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $28, %al
; X86-NEXT: shlb $3, %al
; X86-NEXT: retl
define i8 @test_i8_28_mask_shl_4(i8 %a0) {
; X86-LABEL: test_i8_28_mask_shl_4:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $12, %al
; X86-NEXT: shlb $4, %al
; X86-NEXT: retl
define i8 @test_i8_224_mask_shl_1(i8 %a0) {
; X86-LABEL: test_i8_224_mask_shl_1:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $96, %al
; X86-NEXT: addb %al, %al
; X86-NEXT: retl
define dso_local i32 @test1() nounwind {
; X32-LABEL: test1:
; X32: # %bb.0: # %entry
-; X32-NEXT: movb b, %cl
+; X32-NEXT: movzbl b, %ecx
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: incb %al
; X32-NEXT: movb %al, b
; X64-LABEL: test1:
; X64: # %bb.0: # %entry
; X64-NEXT: pushq %rax
-; X64-NEXT: movb b(%rip), %cl
+; X64-NEXT: movzbl b(%rip), %ecx
; X64-NEXT: leal 1(%rcx), %eax
; X64-NEXT: movb %al, b(%rip)
; X64-NEXT: incl c(%rip)
; X64-NEXT: sete %dl
-; X64-NEXT: movb a(%rip), %sil
+; X64-NEXT: movzbl a(%rip), %esi
; X64-NEXT: leal 1(%rsi), %edi
; X64-NEXT: cmpb %cl, %sil
; X64-NEXT: sete d(%rip)
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movb {{[0-9]+}}(%esp), %bl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: cltd
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movb {{[0-9]+}}(%esp), %bl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: xorl %edx, %edx
define i8 @test8(i8 %x) nounwind {
; X32-LABEL: test8:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: shrb %al
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: imull $211, %eax, %eax
define i8 @test9(i8 %x) nounwind {
; X32-LABEL: test9:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: shrb $2, %al
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: imull $71, %eax, %eax
define i8 @test_urem_noext_ah(i8 %x, i8 %y) {
; X32-LABEL: test_urem_noext_ah:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: divb %cl
; X32-NEXT: movzbl %ah, %eax
define i8 @test_srem_noext_ah(i8 %x, i8 %y) {
; X32-LABEL: test_srem_noext_ah:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: idivb %cl
; X32-NEXT: movsbl %ah, %eax
; X86-LABEL: f13:
; X86: movl $__emutls_v.b1, (%esp)
; X86-NEXT: calll __emutls_get_address
-; X86-NEXT: movb (%eax), %al
+; X86-NEXT: movzbl (%eax), %eax
; X86-NEXT: addl $12, %esp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
; X86-NOBMI-LABEL: bextr32_a0:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-NOBMI-NEXT: movl $1, %eax
;
; X86-BMI1-LABEL: bextr32_a0:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: orl %eax, %ecx
;
; X86-BMI2-LABEL: bextr32_a0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
; X86-NOBMI-LABEL: bextr32_a0_arithmetic:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: sarl %cl, %esi
; X86-NOBMI-NEXT: movl $1, %eax
;
; X86-BMI1-LABEL: bextr32_a0_arithmetic:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: sarl %cl, %edx
; X86-BMI1-NEXT: shll $8, %eax
;
; X86-BMI2-LABEL: bextr32_a0_arithmetic:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: sarxl %ecx, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
; X86-NOBMI-LABEL: bextr32_a1_indexzext:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-NOBMI-NEXT: movl $1, %eax
;
; X86-BMI1-LABEL: bextr32_a1_indexzext:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: orl %eax, %ecx
;
; X86-BMI2-LABEL: bextr32_a1_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
; X86-NOBMI-LABEL: bextr32_a2_load:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %esi
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-BMI1-LABEL: bextr32_a2_load:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: shll $8, %ecx
; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: orl %ecx, %edx
;
; X86-BMI2-LABEL: bextr32_a2_load:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx
; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
; X86-NOBMI-LABEL: bextr32_a3_load_indexzext:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %esi
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-BMI1-LABEL: bextr32_a3_load_indexzext:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: shll $8, %ecx
; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: orl %ecx, %edx
;
; X86-BMI2-LABEL: bextr32_a3_load_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx
; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
; X86-NOBMI-LABEL: bextr32_a4_commutative:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-NOBMI-NEXT: movl $1, %eax
;
; X86-BMI1-LABEL: bextr32_a4_commutative:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: orl %eax, %ecx
;
; X86-BMI2-LABEL: bextr32_a4_commutative:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: pushl %eax
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl %eax, %ecx
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: subl $8, %esp
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %ecx
; X86-BMI1-NEXT: movzbl %al, %edx
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: subl $8, %esp
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: bzhil %eax, %edx, %esi
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrdl %cl, %eax, %esi
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrdl %cl, %eax, %esi
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrdl %cl, %eax, %esi
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl (%eax), %esi
; X86-BMI2-NEXT: movl 4(%eax), %eax
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl (%eax), %esi
; X86-BMI2-NEXT: movl 4(%eax), %eax
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shrdl %cl, %edx, %eax
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: subl $12, %esp
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: subl $12, %esp
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: subl $12, %esp
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: movl %edi, %esi
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: movl %edi, %esi
; X86-BMI2-LABEL: bextr64_32_a0:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %ebx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrdl %cl, %eax, %edx
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: movl %edi, %esi
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: movl %edi, %edx
; X86-BMI2-LABEL: bextr64_32_a1:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: shrdl %cl, %esi, %edx
; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: pushl %eax
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: movl %edx, %esi
; X86-BMI1-NEXT: pushl %ebx
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: pushl %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl %edx, %esi
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: pushl %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrdl %cl, %eax, %esi
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: movl %edi, %esi
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: movl %edi, %edx
; X86-BMI2-LABEL: bextr64_32_a2:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: shrdl %cl, %esi, %edx
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: movl %edi, %esi
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: movl %edi, %esi
; X86-BMI2-LABEL: bextr64_32_a3:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %ebx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrdl %cl, %eax, %edx
; X86-NOBMI-LABEL: bextr32_b0:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-NOBMI-NEXT: movl $-1, %eax
;
; X86-BMI1-LABEL: bextr32_b0:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: orl %eax, %ecx
;
; X86-BMI2-LABEL: bextr32_b0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
; X86-NOBMI-LABEL: bextr32_b1_indexzext:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-NOBMI-NEXT: movl $-1, %eax
;
; X86-BMI1-LABEL: bextr32_b1_indexzext:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: orl %eax, %ecx
;
; X86-BMI2-LABEL: bextr32_b1_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
; X86-NOBMI-LABEL: bextr32_b2_load:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %esi
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-BMI1-LABEL: bextr32_b2_load:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: shll $8, %ecx
; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: orl %ecx, %edx
;
; X86-BMI2-LABEL: bextr32_b2_load:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx
; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
; X86-NOBMI-LABEL: bextr32_b3_load_indexzext:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %esi
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-BMI1-LABEL: bextr32_b3_load_indexzext:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: shll $8, %ecx
; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: orl %ecx, %edx
;
; X86-BMI2-LABEL: bextr32_b3_load_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx
; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
; X86-NOBMI-LABEL: bextr32_b4_commutative:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-NOBMI-NEXT: movl $-1, %eax
;
; X86-BMI1-LABEL: bextr32_b4_commutative:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: orl %eax, %ecx
;
; X86-BMI2-LABEL: bextr32_b4_commutative:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: pushl %eax
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl %eax, %ecx
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: subl $8, %esp
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %ecx
; X86-BMI1-NEXT: movzbl %al, %edx
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: subl $8, %esp
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: bzhil %eax, %edx, %esi
; X86-BMI1-NEXT: pushl %ebx
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: movl %edi, %edx
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shrdl %cl, %edx, %eax
; X86-BMI1-NEXT: pushl %ebx
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: movl %edi, %edx
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shrdl %cl, %edx, %eax
; X86-BMI1-NEXT: pushl %ebx
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl (%edx), %esi
; X86-BMI1-NEXT: movl 4(%edx), %edi
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl (%edx), %eax
; X86-BMI2-NEXT: movl 4(%edx), %esi
; X86-BMI1-NEXT: pushl %ebx
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl (%edx), %esi
; X86-BMI1-NEXT: movl 4(%edx), %edi
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl (%edx), %eax
; X86-BMI2-NEXT: movl 4(%edx), %esi
; X86-BMI1-NEXT: pushl %ebx
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: movl %edi, %edx
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shrdl %cl, %edx, %eax
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: subl $12, %esp
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: subl $12, %esp
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: movl %edi, %eax
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: movl %edi, %edx
; X86-BMI2-LABEL: bextr64_32_b0:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: shrdl %cl, %esi, %edx
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: movl %edi, %esi
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: movl %edi, %edx
; X86-BMI2-LABEL: bextr64_32_b1:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: shrdl %cl, %esi, %edx
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: movl %edi, %esi
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: movl %edi, %edx
; X86-BMI2-LABEL: bextr64_32_b2:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: shrdl %cl, %esi, %edx
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: movl %edi, %eax
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: movl %edi, %edx
; X86-BMI2-LABEL: bextr64_32_b3:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: shrdl %cl, %esi, %edx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: pushl %eax
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: pushl %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: shrl %cl, %edi
; X86-BMI1-NEXT: xorl %ecx, %ecx
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: pushl %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: movl %ebx, %eax
; X86-BMI2-NEXT: negb %al
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: pushl %eax
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: pushl %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: shrl %cl, %edi
; X86-BMI1-NEXT: xorl %ecx, %ecx
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: pushl %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: movl %ebx, %eax
; X86-BMI2-NEXT: negb %al
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: pushl %eax
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: pushl %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl (%eax), %edi
; X86-BMI1-NEXT: shrl %cl, %edi
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: pushl %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, (%eax), %esi
; X86-BMI2-NEXT: movl %ebx, %eax
; X86-BMI2-NEXT: negb %al
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: pushl %eax
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: pushl %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl (%eax), %edi
; X86-BMI1-NEXT: shrl %cl, %edi
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: pushl %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, (%eax), %esi
; X86-BMI2-NEXT: movl %ebx, %eax
; X86-BMI2-NEXT: negb %al
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: pushl %eax
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: pushl %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: shrl %cl, %edi
; X86-BMI1-NEXT: xorl %ecx, %ecx
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: pushl %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: movl %ebx, %eax
; X86-BMI2-NEXT: negb %al
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: subl $16, %esp
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI2-NEXT: shrxl %edi, {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: movl %ebx, %eax
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: subl $12, %esp
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl %eax, %edi
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: subl $12, %esp
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl %eax, %edi
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: subl $12, %esp
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrdl %cl, %eax, %esi
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: subl $12, %esp
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl %eax, %edi
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: subl $12, %esp
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl %eax, %edi
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: subl $12, %esp
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrdl %cl, %eax, %esi
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: subl $12, %esp
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %esi
; X86-NOBMI-NEXT: movl 4(%eax), %eax
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: subl $12, %esp
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl (%eax), %esi
; X86-BMI1-NEXT: movl 4(%eax), %eax
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: subl $12, %esp
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl (%eax), %esi
; X86-BMI2-NEXT: movl 4(%eax), %eax
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: subl $12, %esp
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %esi
; X86-NOBMI-NEXT: movl 4(%eax), %eax
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: subl $12, %esp
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl (%eax), %esi
; X86-BMI1-NEXT: movl 4(%eax), %eax
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: subl $12, %esp
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl (%eax), %esi
; X86-BMI2-NEXT: movl 4(%eax), %eax
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: subl $12, %esp
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl %eax, %edi
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: subl $12, %esp
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl %eax, %edi
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: subl $12, %esp
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrdl %cl, %eax, %esi
; X86-NOBMI-LABEL: bextr64_32_c0:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movl %esi, %edx
; X86-BMI1-LABEL: bextr64_32_c0:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl %esi, %edx
;
; X86-BMI2-LABEL: bextr64_32_c0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrdl %cl, %eax, %edx
; X86-NOBMI-LABEL: bextr64_32_c1:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movl %esi, %eax
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: movl %edi, %edx
; X86-BMI2-LABEL: bextr64_32_c1:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: shrdl %cl, %esi, %edx
; X86-NOBMI-LABEL: bextr64_32_c2:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movl %esi, %eax
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: movl %edi, %edx
; X86-BMI2-LABEL: bextr64_32_c2:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: shrdl %cl, %esi, %edx
; X86-NOBMI-LABEL: bextr64_32_c3:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movl %esi, %edx
; X86-BMI1-LABEL: bextr64_32_c3:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl %esi, %edx
; X86-BMI2-LABEL: bextr64_32_c3:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrdl %cl, %eax, %edx
define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bextr32_d0:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: shrl %cl, %eax
; X86-NOBMI-NEXT: xorl %ecx, %ecx
;
; X86-BMI1-LABEL: bextr32_d0:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: orl %eax, %ecx
;
; X86-BMI2-LABEL: bextr32_d0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bextr32_d1_indexzext:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: shrl %cl, %eax
; X86-NOBMI-NEXT: xorl %ecx, %ecx
;
; X86-BMI1-LABEL: bextr32_d1_indexzext:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: orl %eax, %ecx
;
; X86-BMI2-LABEL: bextr32_d1_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
define i32 @bextr32_d2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bextr32_d2_load:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %eax
; X86-NOBMI-NEXT: shrl %cl, %eax
; X86-BMI1-LABEL: bextr32_d2_load:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: shll $8, %ecx
; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: orl %ecx, %edx
;
; X86-BMI2-LABEL: bextr32_d2_load:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx
; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
define i32 @bextr32_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bextr32_d3_load_indexzext:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %eax
; X86-NOBMI-NEXT: shrl %cl, %eax
; X86-BMI1-LABEL: bextr32_d3_load_indexzext:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: shll $8, %ecx
; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: orl %ecx, %edx
;
; X86-BMI2-LABEL: bextr32_d3_load_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx
; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: subl $8, %esp
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %ecx
; X86-BMI1-NEXT: movzbl %al, %edx
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: subl $8, %esp
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: bzhil %eax, %edx, %esi
; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: movl %edx, %eax
; X86-BMI1-NEXT: pushl %ebx
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl %edx, %eax
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shrdl %cl, %edx, %eax
; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: movl %edx, %eax
; X86-BMI1-NEXT: pushl %ebx
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl %edx, %eax
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shrdl %cl, %edx, %eax
; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %edi
; X86-NOBMI-NEXT: movl 4(%eax), %edx
; X86-BMI1-NEXT: pushl %ebx
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl (%eax), %edi
; X86-BMI1-NEXT: movl 4(%eax), %edx
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl (%edx), %eax
; X86-BMI2-NEXT: movl 4(%edx), %edx
; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %edi
; X86-NOBMI-NEXT: movl 4(%eax), %edx
; X86-BMI1-NEXT: pushl %ebx
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl (%eax), %edi
; X86-BMI1-NEXT: movl 4(%eax), %edx
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl (%edx), %eax
; X86-BMI2-NEXT: movl 4(%edx), %edx
; X86-NOBMI-LABEL: bextr64_32_d0:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movl %esi, %eax
; X86-BMI1-LABEL: bextr64_32_d0:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl %esi, %eax
;
; X86-BMI2-LABEL: bextr64_32_d0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shrdl %cl, %edx, %eax
; X86-NOBMI-LABEL: bextr64_32_d1:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movl %esi, %eax
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1-NEXT: movl %edi, %edx
; X86-BMI2-LABEL: bextr64_32_d1:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI2-NEXT: shrdl %cl, %esi, %edx
define i8 @extractelt_bitcast(i32 %x) nounwind {
; X86-LABEL: extractelt_bitcast:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
; X64-LABEL: extractelt_bitcast:
define i8 @trunc_i32_to_i8_le(i32 %x) {
; X86-LABEL: trunc_i32_to_i8_le:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
; X64-LABEL: trunc_i32_to_i8_le:
define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi32_a0:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $1, %eax
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: decl %eax
;
; X86-BMI1-LABEL: bzhi32_a0:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi32_a0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: retl
;
define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi32_a1_indexzext:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $1, %eax
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: decl %eax
;
; X86-BMI1-LABEL: bzhi32_a1_indexzext:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi32_a1_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: retl
;
; X86-NOBMI-LABEL: bzhi32_a2_load:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $1, %eax
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: decl %eax
; X86-BMI1-LABEL: bzhi32_a2_load:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: shll $8, %ecx
; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax
; X86-BMI1-NEXT: retl
; X86-BMI2-LABEL: bzhi32_a2_load:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax
; X86-BMI2-NEXT: retl
;
; X86-NOBMI-LABEL: bzhi32_a3_load_indexzext:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $1, %eax
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: decl %eax
; X86-BMI1-LABEL: bzhi32_a3_load_indexzext:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: shll $8, %ecx
; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax
; X86-BMI1-NEXT: retl
; X86-BMI2-LABEL: bzhi32_a3_load_indexzext:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax
; X86-BMI2-NEXT: retl
;
define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi32_a4_commutative:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $1, %eax
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: decl %eax
;
; X86-BMI1-LABEL: bzhi32_a4_commutative:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi32_a4_commutative:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: retl
;
define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_a0:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $1, %eax
; X86-NOBMI-NEXT: xorl %edx, %edx
; X86-NOBMI-NEXT: shldl %cl, %eax, %edx
;
; X86-BMI1-LABEL: bzhi64_a0:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $1, %eax
; X86-BMI1-NEXT: xorl %edx, %edx
; X86-BMI1-NEXT: shldl %cl, %eax, %edx
;
; X86-BMI2-LABEL: bzhi64_a0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl $1, %eax
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: shldl %cl, %eax, %edx
define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_a1_indexzext:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $1, %eax
; X86-NOBMI-NEXT: xorl %edx, %edx
; X86-NOBMI-NEXT: shldl %cl, %eax, %edx
;
; X86-BMI1-LABEL: bzhi64_a1_indexzext:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $1, %eax
; X86-BMI1-NEXT: xorl %edx, %edx
; X86-BMI1-NEXT: shldl %cl, %eax, %edx
;
; X86-BMI2-LABEL: bzhi64_a1_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl $1, %eax
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: shldl %cl, %eax, %edx
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $1, %eax
; X86-NOBMI-NEXT: xorl %edx, %edx
; X86-NOBMI-NEXT: shldl %cl, %eax, %edx
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $1, %eax
; X86-BMI1-NEXT: xorl %edx, %edx
; X86-BMI1-NEXT: shldl %cl, %eax, %edx
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl $1, %eax
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: shldl %cl, %eax, %edx
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $1, %eax
; X86-NOBMI-NEXT: xorl %edx, %edx
; X86-NOBMI-NEXT: shldl %cl, %eax, %edx
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $1, %eax
; X86-BMI1-NEXT: xorl %edx, %edx
; X86-BMI1-NEXT: shldl %cl, %eax, %edx
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl $1, %eax
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: shldl %cl, %eax, %edx
define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_a4_commutative:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $1, %eax
; X86-NOBMI-NEXT: xorl %edx, %edx
; X86-NOBMI-NEXT: shldl %cl, %eax, %edx
;
; X86-BMI1-LABEL: bzhi64_a4_commutative:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $1, %eax
; X86-BMI1-NEXT: xorl %edx, %edx
; X86-BMI1-NEXT: shldl %cl, %eax, %edx
;
; X86-BMI2-LABEL: bzhi64_a4_commutative:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl $1, %eax
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: shldl %cl, %eax, %edx
define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_32_a0:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $1, %edx
; X86-NOBMI-NEXT: shll %cl, %edx
; X86-NOBMI-NEXT: xorl %eax, %eax
;
; X86-BMI1-LABEL: bzhi64_32_a0:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $1, %edx
; X86-BMI1-NEXT: shll %cl, %edx
; X86-BMI1-NEXT: xorl %eax, %eax
;
; X86-BMI2-LABEL: bzhi64_32_a0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: jne .LBB10_2
define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_32_a1:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $1, %eax
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: decl %eax
;
; X86-BMI1-LABEL: bzhi64_32_a1:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi64_32_a1:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: retl
;
define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits, ptr %escape) nounwind {
; X86-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl %edx, (%eax)
;
; X86-BMI1-LABEL: bzhi64_32_a1_trunc_extrause:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl %ecx, (%edx)
;
; X86-BMI2-LABEL: bzhi64_32_a1_trunc_extrause:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl %ecx, (%edx)
define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_32_a2:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $1, %eax
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: decl %eax
;
; X86-BMI1-LABEL: bzhi64_32_a2:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi64_32_a2:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: retl
;
define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_32_a3:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $1, %edx
; X86-NOBMI-NEXT: shll %cl, %edx
; X86-NOBMI-NEXT: xorl %eax, %eax
;
; X86-BMI1-LABEL: bzhi64_32_a3:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $1, %edx
; X86-BMI1-NEXT: shll %cl, %edx
; X86-BMI1-NEXT: xorl %eax, %eax
;
; X86-BMI2-LABEL: bzhi64_32_a3:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: testb $32, %cl
; X86-BMI2-NEXT: jne .LBB14_2
define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi32_b0:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: notl %eax
;
; X86-BMI1-LABEL: bzhi32_b0:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi32_b0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: retl
;
define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi32_b1_indexzext:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: notl %eax
;
; X86-BMI1-LABEL: bzhi32_b1_indexzext:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi32_b1_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: retl
;
; X86-NOBMI-LABEL: bzhi32_b2_load:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: notl %eax
; X86-BMI1-LABEL: bzhi32_b2_load:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: shll $8, %ecx
; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax
; X86-BMI1-NEXT: retl
; X86-BMI2-LABEL: bzhi32_b2_load:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax
; X86-BMI2-NEXT: retl
;
; X86-NOBMI-LABEL: bzhi32_b3_load_indexzext:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: notl %eax
; X86-BMI1-LABEL: bzhi32_b3_load_indexzext:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: shll $8, %ecx
; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax
; X86-BMI1-NEXT: retl
; X86-BMI2-LABEL: bzhi32_b3_load_indexzext:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax
; X86-BMI2-NEXT: retl
;
define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi32_b4_commutative:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: notl %eax
;
; X86-BMI1-LABEL: bzhi32_b4_commutative:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi32_b4_commutative:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: retl
;
; X86-NOBMI-LABEL: bzhi64_b0:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $-1, %edx
; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: shll %cl, %esi
;
; X86-BMI1-LABEL: bzhi64_b0:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $-1, %edx
; X86-BMI1-NEXT: movl $-1, %eax
; X86-BMI1-NEXT: shll %cl, %eax
;
; X86-BMI2-LABEL: bzhi64_b0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl $-1, %ecx
; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax
; X86-BMI2-NEXT: testb $32, %dl
; X86-NOBMI-LABEL: bzhi64_b1_indexzext:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $-1, %edx
; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: shll %cl, %esi
;
; X86-BMI1-LABEL: bzhi64_b1_indexzext:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $-1, %edx
; X86-BMI1-NEXT: movl $-1, %eax
; X86-BMI1-NEXT: shll %cl, %eax
;
; X86-BMI2-LABEL: bzhi64_b1_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl $-1, %ecx
; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax
; X86-BMI2-NEXT: testb $32, %dl
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $-1, %edx
; X86-NOBMI-NEXT: movl $-1, %edi
; X86-NOBMI-NEXT: shll %cl, %edi
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $-1, %esi
; X86-BMI1-NEXT: movl $-1, %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax
; X86-BMI2-NEXT: testb $32, %bl
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $-1, %edx
; X86-NOBMI-NEXT: movl $-1, %edi
; X86-NOBMI-NEXT: shll %cl, %edi
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $-1, %esi
; X86-BMI1-NEXT: movl $-1, %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax
; X86-BMI2-NEXT: testb $32, %bl
; X86-NOBMI-LABEL: bzhi64_b4_commutative:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $-1, %edx
; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: shll %cl, %esi
;
; X86-BMI1-LABEL: bzhi64_b4_commutative:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $-1, %edx
; X86-BMI1-NEXT: movl $-1, %eax
; X86-BMI1-NEXT: shll %cl, %eax
;
; X86-BMI2-LABEL: bzhi64_b4_commutative:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movl $-1, %ecx
; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax
; X86-BMI2-NEXT: testb $32, %dl
define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_32_b0:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $-1, %edx
; X86-NOBMI-NEXT: shll %cl, %edx
; X86-NOBMI-NEXT: xorl %eax, %eax
;
; X86-BMI1-LABEL: bzhi64_32_b0:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $-1, %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: xorl %edx, %edx
;
; X86-BMI2-LABEL: bzhi64_32_b0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: xorl %ecx, %ecx
; X86-BMI2-NEXT: testb $32, %al
; X86-BMI2-NEXT: jne .LBB25_2
define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_32_b1:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: notl %eax
;
; X86-BMI1-LABEL: bzhi64_32_b1:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi64_32_b1:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: retl
;
define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_32_b2:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: notl %eax
;
; X86-BMI1-LABEL: bzhi64_32_b2:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi64_32_b2:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: retl
;
define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind {
; X86-NOBMI-LABEL: bzhi64_32_b3:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: movl $-1, %edx
; X86-NOBMI-NEXT: shll %cl, %edx
; X86-NOBMI-NEXT: xorl %eax, %eax
;
; X86-BMI1-LABEL: bzhi64_32_b3:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $-1, %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: xorl %edx, %edx
;
; X86-BMI2-LABEL: bzhi64_32_b3:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: xorl %ecx, %ecx
; X86-BMI2-NEXT: testb $32, %al
; X86-BMI2-NEXT: jne .LBB28_2
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: bzhil %edx, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx
; X86-BMI2-NEXT: negb %dl
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: bzhil %edx, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx
; X86-BMI2-NEXT: negb %dl
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax
; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx
; X86-BMI2-NEXT: negb %dl
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax
; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx
; X86-BMI2-NEXT: negb %dl
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: bzhil %edx, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx
; X86-BMI2-NEXT: negb %dl
;
; X86-BMI1-LABEL: bzhi64_32_c1:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi64_32_c1:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: retl
;
;
; X86-BMI1-LABEL: bzhi64_32_c2:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi64_32_c2:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: retl
;
;
; X86-BMI1-LABEL: bzhi32_d0:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi32_d0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: retl
;
;
; X86-BMI1-LABEL: bzhi32_d1_indexzext:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi32_d1_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: retl
;
; X86-BMI1-LABEL: bzhi32_d2_load:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: shll $8, %ecx
; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax
; X86-BMI1-NEXT: retl
; X86-BMI2-LABEL: bzhi32_d2_load:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax
; X86-BMI2-NEXT: retl
;
; X86-BMI1-LABEL: bzhi32_d3_load_indexzext:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: shll $8, %ecx
; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax
; X86-BMI1-NEXT: retl
; X86-BMI2-LABEL: bzhi32_d3_load_indexzext:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax
; X86-BMI2-NEXT: retl
;
;
; X86-BMI1-LABEL: bzhi64_32_d1:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll $8, %eax
; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi64_32_d1:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: retl
;
; SSE: # %bb.0:
; SSE-NEXT: andl $15, %edi
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movb -24(%rsp,%rdi), %al
+; SSE-NEXT: movzbl -24(%rsp,%rdi), %eax
; SSE-NEXT: retq
;
; AVX-LABEL: extractelement_v16i8_var:
; AVX: # %bb.0:
; AVX-NEXT: andl $15, %edi
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; AVX-NEXT: movb -24(%rsp,%rdi), %al
+; AVX-NEXT: movzbl -24(%rsp,%rdi), %eax
; AVX-NEXT: retq
%b = extractelement <16 x i8> %a, i256 %i
ret i8 %b
; SSE-NEXT: andl $31, %edi
; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE-NEXT: movb -40(%rsp,%rdi), %al
+; SSE-NEXT: movzbl -40(%rsp,%rdi), %eax
; SSE-NEXT: retq
;
; AVX-LABEL: extractelement_v32i8_var:
; AVX-NEXT: subq $64, %rsp
; AVX-NEXT: andl $31, %edi
; AVX-NEXT: vmovaps %ymm0, (%rsp)
-; AVX-NEXT: movb (%rsp,%rdi), %al
+; AVX-NEXT: movzbl (%rsp,%rdi), %eax
; AVX-NEXT: movq %rbp, %rsp
; AVX-NEXT: popq %rbp
; AVX-NEXT: vzeroupper
; CHECK-LABEL: foo:
top:
%0 = load i8, ptr %arg
-; CHECK: movb
+; CHECK: movzbl
%1 = trunc i8 %0 to i1
; CHECK: andb $1,
%2 = call i64 @bar(i1 %1)
entry:
; clang uses i8 constants for booleans, so we test with an i8 1.
; CHECK-LABEL: test2:
-; CHECK: movb {{.*}} %al
+; CHECK: movzbl {{.*}} %eax
; CHECK-NEXT: xorb $1, %al
; CHECK-NEXT: testb $1
%tmp = load i8, ptr %a, align 1
define i8 @test1(i8 %x) nounwind {
; X32-LABEL: test1:
; X32: ## %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: andb $1, %al
; X32-NEXT: negb %al
; X32-NEXT: retl
define i8 @test5(i8 %x) nounwind {
; X32-LABEL: test5:
; X32: ## %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: andb $1, %al
; X32-NEXT: retl
;
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
;
-; X32-LABEL: test_movb:
-; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
-; X32-NEXT: retl
+; BWON32-LABEL: test_movb:
+; BWON32: # %bb.0:
+; BWON32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; BWON32-NEXT: retl
+;
+; BWOFF32-LABEL: test_movb:
+; BWOFF32: # %bb.0:
+; BWOFF32-NEXT: movb {{[0-9]+}}(%esp), %al
+; BWOFF32-NEXT: retl
ret i8 %a0
}
ret void
}
-; This test contains nothing but a simple byte load and store. Since
-; movb encodes smaller, we do not want to use movzbl unless in a tight loop.
-; So this test checks that movb is used.
+; This test contains nothing but a simple byte load and store.
+; movb encodes smaller, but we use movzbl for the load for better perf.
; CHECK-LABEL: foo3:
-; CHECK: movb
+; BWON: movzbl
+; BWOFF: movb
; CHECK: movb
define void @foo3(ptr%dst, ptr%src) {
%t0 = load i8, ptr%src, align 1
; CHECK-LABEL: t1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andq $-255, %rsi
-; CHECK-NEXT: movb (%rdi,%rsi,4), %al
+; CHECK-NEXT: movzbl (%rdi,%rsi,4), %eax
; CHECK-NEXT: retq
entry:
; CHECK-LABEL: t2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andq $-14, %rsi
-; CHECK-NEXT: movb (%rdi,%rsi,4), %al
+; CHECK-NEXT: movzbl (%rdi,%rsi,4), %eax
; CHECK-NEXT: retq
entry:
; CHECK-LABEL: t3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: movb (%rdi,%rax,4), %al
+; CHECK-NEXT: movzbl (%rdi,%rax,4), %eax
; CHECK-NEXT: retq
entry:
; CHECK-LABEL: t4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andl $-2, %esi
-; CHECK-NEXT: movb (%rdi,%rsi,4), %al
+; CHECK-NEXT: movzbl (%rdi,%rsi,4), %eax
; CHECK-NEXT: retq
entry:
; CHECK-LABEL: t5:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andl $-250002, %esi # imm = 0xFFFC2F6E
-; CHECK-NEXT: movb (%rdi,%rsi,4), %al
+; CHECK-NEXT: movzbl (%rdi,%rsi,4), %eax
; CHECK-NEXT: retq
entry:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
; CHECK-NEXT: andl $15, %esi
-; CHECK-NEXT: movb (%rdi,%rsi,4), %al
+; CHECK-NEXT: movzbl (%rdi,%rsi,4), %eax
; CHECK-NEXT: retq
entry:
%tmp2 = shl i32 %i, 2
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: andl $-14, %ecx
-; CHECK-NEXT: movb (%eax,%ecx,4), %al
+; CHECK-NEXT: movzbl (%eax,%ecx,4), %eax
; CHECK-NEXT: retl
entry:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl $-255, %ecx
; CHECK-NEXT: andl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movb (%eax,%ecx,4), %al
+; CHECK-NEXT: movzbl (%eax,%ecx,4), %eax
; CHECK-NEXT: retl
entry:
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: fistps {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
-; X87-NEXT: movb {{[0-9]+}}(%esp), %al
+; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: addl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: fistps {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
-; X87-NEXT: movb {{[0-9]+}}(%esp), %al
+; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: addl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: fistps {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
-; X87-NEXT: movb {{[0-9]+}}(%esp), %al
+; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: addl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: fistps {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
-; X87-NEXT: movb {{[0-9]+}}(%esp), %al
+; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: addl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: fistps {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
-; X87-NEXT: movb {{[0-9]+}}(%esp), %al
+; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: addl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: fistps {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
-; X87-NEXT: movb {{[0-9]+}}(%esp), %al
+; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: addl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: fistps {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
-; X87-NEXT: movb {{[0-9]+}}(%esp), %al
+; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: addl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: fistps {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
-; X87-NEXT: movb {{[0-9]+}}(%esp), %al
+; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: addl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X87-NEXT: fistps {{[0-9]+}}(%esp)
; X87-NEXT: fldcw {{[0-9]+}}(%esp)
-; X87-NEXT: movb {{[0-9]+}}(%esp), %al
+; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: addl $8, %esp
; X87-NEXT: .cfi_def_cfa_offset 4
; X87-NEXT: retl
;
; X86-LABEL: sitofp_i1tof16:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $1, %al
; X86-NEXT: negb %al
; X86-NEXT: movsbl %al, %eax
;
; X86-LABEL: uitofp_i1tof16:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $1, %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; SSE-X86: # %bb.0:
; SSE-X86-NEXT: pushl %eax
; SSE-X86-NEXT: .cfi_def_cfa_offset 8
-; SSE-X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; SSE-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SSE-X86-NEXT: andb $1, %al
; SSE-X86-NEXT: negb %al
; SSE-X86-NEXT: movsbl %al, %eax
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
-; AVX-X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; AVX-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; AVX-X86-NEXT: andb $1, %al
; AVX-X86-NEXT: negb %al
; AVX-X86-NEXT: movsbl %al, %eax
; X87: # %bb.0:
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
-; X87-NEXT: movb {{[0-9]+}}(%esp), %al
+; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: andb $1, %al
; X87-NEXT: negb %al
; X87-NEXT: movsbl %al, %eax
; SSE-X86: # %bb.0:
; SSE-X86-NEXT: pushl %eax
; SSE-X86-NEXT: .cfi_def_cfa_offset 8
-; SSE-X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; SSE-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SSE-X86-NEXT: andb $1, %al
; SSE-X86-NEXT: movzbl %al, %eax
; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
-; AVX-X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; AVX-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; AVX-X86-NEXT: andb $1, %al
; AVX-X86-NEXT: movzbl %al, %eax
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; X87: # %bb.0:
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
-; X87-NEXT: movb {{[0-9]+}}(%esp), %al
+; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: andb $1, %al
; X87-NEXT: movzbl %al, %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: .cfi_def_cfa_register %ebp
; SSE-X86-NEXT: andl $-8, %esp
; SSE-X86-NEXT: subl $8, %esp
-; SSE-X86-NEXT: movb 8(%ebp), %al
+; SSE-X86-NEXT: movzbl 8(%ebp), %eax
; SSE-X86-NEXT: andb $1, %al
; SSE-X86-NEXT: movzbl %al, %eax
; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
-; AVX-X86-NEXT: movb 8(%ebp), %al
+; AVX-X86-NEXT: movzbl 8(%ebp), %eax
; AVX-X86-NEXT: andb $1, %al
; AVX-X86-NEXT: movzbl %al, %eax
; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; X87: # %bb.0:
; X87-NEXT: pushl %eax
; X87-NEXT: .cfi_def_cfa_offset 8
-; X87-NEXT: movb {{[0-9]+}}(%esp), %al
+; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X87-NEXT: andb $1, %al
; X87-NEXT: movzbl %al, %eax
; X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: fistps {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl $8, %esp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: fistps -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
-; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: retq
%result = call i1 @llvm.experimental.constrained.fptosi.i1.f80(x86_fp80 %x,
metadata !"fpexcept.strict") #0
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: fistps {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl $8, %esp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: fistps -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
-; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: retq
%result = call i8 @llvm.experimental.constrained.fptosi.i8.f80(x86_fp80 %x,
metadata !"fpexcept.strict") #0
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: fistps {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl $8, %esp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: fistps -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
-; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: retq
%result = call i1 @llvm.experimental.constrained.fptoui.i1.f80(x86_fp80 %x,
metadata !"fpexcept.strict") #0
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: fistps {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl $8, %esp
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: fistps -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
-; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: retq
%result = call i8 @llvm.experimental.constrained.fptoui.i8.f80(x86_fp80 %x,
metadata !"fpexcept.strict") #0
; X86: # %bb.0:
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $1, %al
; X86-NEXT: negb %al
; X86-NEXT: movsbl %al, %eax
; X86: # %bb.0:
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $1, %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: movb $-1, %dl
; X86-X87-NEXT: jb .LBB0_2
; X86-X87-NEXT: # %bb.1:
-; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: .LBB0_2:
; X86-X87-NEXT: fldz
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: movb $-128, %dl
; X86-X87-NEXT: jb .LBB1_2
; X86-X87-NEXT: # %bb.1:
-; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: .LBB1_2:
; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: movb $-1, %dl
; X86-X87-NEXT: jb .LBB10_2
; X86-X87-NEXT: # %bb.1:
-; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: .LBB10_2:
; X86-X87-NEXT: fldz
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: movb $-128, %dl
; X86-X87-NEXT: jb .LBB11_2
; X86-X87-NEXT: # %bb.1:
-; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: .LBB11_2:
; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: movb $-1, %dl
; X86-X87-NEXT: jb .LBB20_2
; X86-X87-NEXT: # %bb.1:
-; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: .LBB20_2:
; X86-X87-NEXT: fldz
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: movb $-128, %dl
; X86-X87-NEXT: jb .LBB21_2
; X86-X87-NEXT: # %bb.1:
-; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: .LBB21_2:
; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: movb $-1, %dl
; X86-X87-NEXT: jb .LBB30_2
; X86-X87-NEXT: # %bb.1:
-; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: .LBB30_2:
; X86-X87-NEXT: fldz
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: movb $-128, %dl
; X86-X87-NEXT: jb .LBB31_2
; X86-X87-NEXT: # %bb.1:
-; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: .LBB31_2:
; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jb .LBB0_1
; X86-X87-NEXT: # %bb.2:
-; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: jmp .LBB0_3
; X86-X87-NEXT: .LBB0_1:
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jb .LBB1_1
; X86-X87-NEXT: # %bb.2:
-; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: jmp .LBB1_3
; X86-X87-NEXT: .LBB1_1:
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jb .LBB10_1
; X86-X87-NEXT: # %bb.2:
-; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: jmp .LBB10_3
; X86-X87-NEXT: .LBB10_1:
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jb .LBB11_1
; X86-X87-NEXT: # %bb.2:
-; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: jmp .LBB11_3
; X86-X87-NEXT: .LBB11_1:
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jb .LBB20_1
; X86-X87-NEXT: # %bb.2:
-; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: jmp .LBB20_3
; X86-X87-NEXT: .LBB20_1:
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jb .LBB21_1
; X86-X87-NEXT: # %bb.2:
-; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: jmp .LBB21_3
; X86-X87-NEXT: .LBB21_1:
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jb .LBB30_1
; X86-X87-NEXT: # %bb.2:
-; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: jmp .LBB30_3
; X86-X87-NEXT: .LBB30_1:
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jb .LBB31_1
; X86-X87-NEXT: # %bb.2:
-; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: jmp .LBB31_3
; X86-X87-NEXT: .LBB31_1:
; X86-X87-NEXT: xorl %ecx, %ecx
define i8 @var_shift_i8(i8 %x, i8 %y, i8 %z) nounwind {
; X86-LABEL: var_shift_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll $8, %eax
; X86-FAST: # %bb.0:
; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-FAST-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-FAST-NEXT: andb $15, %cl
; X86-FAST-NEXT: shldw %cl, %dx, %ax
; X86-FAST-NEXT: retl
;
; X86-SLOW-LABEL: var_shift_i16:
; X86-SLOW: # %bb.0:
-; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SLOW-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SLOW-NEXT: shll $16, %eax
define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind {
; X86-FAST-LABEL: var_shift_i32:
; X86-FAST: # %bb.0:
-; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-FAST-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-FAST-NEXT: shldl %cl, %edx, %eax
; X86-SLOW-LABEL: var_shift_i32:
; X86-SLOW: # %bb.0:
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SLOW-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SLOW-NEXT: shll %cl, %edx
; X86-SLOW-NEXT: notb %cl
define i8 @const_shift_i8(i8 %x, i8 %y) nounwind {
; X86-LABEL: const_shift_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shrb %cl
; X86-NEXT: shlb $7, %al
; X86-NEXT: orb %cl, %al
; X86-LABEL: combine_fshl_load_i8:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb 1(%eax), %al
+; X86-NEXT: movzbl 1(%eax), %eax
; X86-NEXT: retl
;
; X64-LABEL: combine_fshl_load_i8:
; X64: # %bb.0:
-; X64-NEXT: movb 1(%rdi), %al
+; X64-NEXT: movzbl 1(%rdi), %eax
; X64-NEXT: retq
%p1 = getelementptr i8, ptr %p, i32 1
%ld0 = load i8, ptr%p
define i8 @var_shift_i8(i8 %x, i8 %y, i8 %z) nounwind {
; X86-LABEL: var_shift_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll $8, %eax
; X86-FAST: # %bb.0:
; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-FAST-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-FAST-NEXT: andb $15, %cl
; X86-FAST-NEXT: shrdw %cl, %dx, %ax
; X86-FAST-NEXT: retl
;
; X86-SLOW-LABEL: var_shift_i16:
; X86-SLOW: # %bb.0:
-; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SLOW-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SLOW-NEXT: shll $16, %eax
define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind {
; X86-FAST-LABEL: var_shift_i32:
; X86-FAST: # %bb.0:
-; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-FAST-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-FAST-NEXT: shrdl %cl, %edx, %eax
; X86-SLOW-LABEL: var_shift_i32:
; X86-SLOW: # %bb.0:
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SLOW-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SLOW-NEXT: shrl %cl, %edx
; X86-SLOW-NEXT: notb %cl
define i8 @const_shift_i8(i8 %x, i8 %y) nounwind {
; X86-LABEL: const_shift_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shrb $7, %cl
; X86-NEXT: addb %al, %al
; X86-NEXT: orb %cl, %al
; X86-LABEL: combine_fshr_load_i8:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb (%eax), %al
+; X86-NEXT: movzbl (%eax), %eax
; X86-NEXT: retl
;
; X64-LABEL: combine_fshr_load_i8:
; X64: # %bb.0:
-; X64-NEXT: movb (%rdi), %al
+; X64-NEXT: movzbl (%rdi), %eax
; X64-NEXT: retq
%p1 = getelementptr i8, ptr %p, i32 1
%ld0 = load i8, ptr%p
define i8 @rotl_i8_const_shift(i8 %x) nounwind {
; X86-SSE2-LABEL: rotl_i8_const_shift:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: rolb $3, %al
; X86-SSE2-NEXT: retl
;
define i8 @rotl_i8_const_shift1(i8 %x) nounwind {
; X86-SSE2-LABEL: rotl_i8_const_shift1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: rolb %al
; X86-SSE2-NEXT: retl
;
define i8 @rotl_i8_const_shift7(i8 %x) nounwind {
; X86-SSE2-LABEL: rotl_i8_const_shift7:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: rorb %al
; X86-SSE2-NEXT: retl
;
define i16 @rotl_i16(i16 %x, i16 %z) nounwind {
; X86-SSE2-LABEL: rotl_i16:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: rolw %cl, %ax
; X86-SSE2-NEXT: retl
define i32 @rotl_i32(i32 %x, i32 %z) nounwind {
; X86-SSE2-LABEL: rotl_i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: roll %cl, %eax
; X86-SSE2-NEXT: retl
define i8 @rotr_i8_const_shift(i8 %x) nounwind {
; X86-SSE2-LABEL: rotr_i8_const_shift:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: rorb $3, %al
; X86-SSE2-NEXT: retl
;
define i8 @rotr_i8_const_shift1(i8 %x) nounwind {
; X86-SSE2-LABEL: rotr_i8_const_shift1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: rorb %al
; X86-SSE2-NEXT: retl
;
define i8 @rotr_i8_const_shift7(i8 %x) nounwind {
; X86-SSE2-LABEL: rotr_i8_const_shift7:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: rolb %al
; X86-SSE2-NEXT: retl
;
define i16 @rotr_i16(i16 %x, i16 %z) nounwind {
; X86-SSE2-LABEL: rotr_i16:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: rorw %cl, %ax
; X86-SSE2-NEXT: retl
define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind {
; X86-SSE2-LABEL: fshl_i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: shldl %cl, %edx, %eax
define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) nounwind {
; X86-SSE2-LABEL: fshr_i32:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: shrdl %cl, %edx, %eax
define i32 @fshl_i32_undef0(i32 %a0, i32 %a1) nounwind {
; X86-SSE2-LABEL: fshl_i32_undef0:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: shldl %cl, %eax, %eax
; X86-SSE2-NEXT: retl
define i32 @fshl_i32_undef1(i32 %a0, i32 %a1) nounwind {
; X86-SSE2-LABEL: fshl_i32_undef1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: shldl %cl, %eax, %eax
; X86-SSE2-NEXT: retl
; X86-SSE2-LABEL: fshl_i32_undef1_msk:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: andb $7, %cl
; X86-SSE2-NEXT: shll %cl, %eax
; X86-SSE2-NEXT: retl
define i32 @fshr_i32_undef0(i32 %a0, i32 %a1) nounwind {
; X86-SSE2-LABEL: fshr_i32_undef0:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: shrdl %cl, %eax, %eax
; X86-SSE2-NEXT: retl
; X86-SSE2-LABEL: fshr_i32_undef0_msk:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: andb $7, %cl
; X86-SSE2-NEXT: shrl %cl, %eax
; X86-SSE2-NEXT: retl
define i32 @fshr_i32_undef1(i32 %a0, i32 %a1) nounwind {
; X86-SSE2-LABEL: fshr_i32_undef1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: shrdl %cl, %eax, %eax
; X86-SSE2-NEXT: retl
define i32 @fshl_i32_zero0(i32 %a0, i32 %a1) nounwind {
; X86-SSE2-LABEL: fshl_i32_zero0:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: xorl %eax, %eax
; X86-SSE2-NEXT: shldl %cl, %edx, %eax
define i32 @fshl_i32_zero1(i32 %a0, i32 %a1) nounwind {
; X86-SSE2-LABEL: fshl_i32_zero1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: xorl %edx, %edx
; X86-SSE2-NEXT: shldl %cl, %edx, %eax
define i32 @fshr_i32_zero0(i32 %a0, i32 %a1) nounwind {
; X86-SSE2-LABEL: fshr_i32_zero0:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: xorl %edx, %edx
; X86-SSE2-NEXT: shrdl %cl, %edx, %eax
define i32 @fshr_i32_zero1(i32 %a0, i32 %a1) nounwind {
; X86-SSE2-LABEL: fshr_i32_zero1:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: xorl %eax, %eax
; X86-SSE2-NEXT: shrdl %cl, %edx, %eax
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pushl %esi
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movl %edx, %esi
; X86-SSE2-NEXT: shll %cl, %esi
; X86-SSE2-LABEL: or_shl_rotl:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: shll %cl, %edx
; X86-SSE2-NEXT: roll %cl, %eax
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pushl %esi
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movl %edx, %esi
; X86-SSE2-NEXT: shll %cl, %esi
; X86-SSE2-LABEL: or_shl_rotl_commute:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: shll %cl, %edx
; X86-SSE2-NEXT: roll %cl, %eax
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pushl %esi
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movl %edx, %esi
; X86-SSE2-NEXT: shrl %cl, %esi
; X86-SSE2-LABEL: or_lshr_rotr:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: shrl %cl, %edx
; X86-SSE2-NEXT: rorl %cl, %eax
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pushl %esi
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movl %edx, %esi
; X86-SSE2-NEXT: shrl %cl, %esi
; X86-SSE2-LABEL: or_lshr_rotr_commute:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: shrl %cl, %edx
; X86-SSE2-NEXT: rorl %cl, %eax
define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) nounwind {
; X86-SSE2-LABEL: or_shl_fshl_simplify:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: shldl %cl, %edx, %eax
define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) nounwind {
; X86-SSE2-LABEL: or_lshr_fshr_simplify:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: shrdl %cl, %edx, %eax
; X86-64-NEXT: testb $1, %dil
; X86-64-NEXT: je .LBB2_2
; X86-64-NEXT: # %bb.1: # %if
-; X86-64-NEXT: movb (%rdx), %al
+; X86-64-NEXT: movzbl (%rdx), %eax
; X86-64-NEXT: addb (%rcx), %al
; X86-64-NEXT: jmp .LBB2_3
; X86-64-NEXT: .LBB2_2: # %else
-; X86-64-NEXT: movb (%rcx), %al
+; X86-64-NEXT: movzbl (%rcx), %eax
; X86-64-NEXT: .LBB2_3: # %exit
; X86-64-NEXT: kmovd %eax, %k1
; X86-64-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; X86-32-NEXT: je .LBB2_2
; X86-32-NEXT: # %bb.1: # %if
; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-32-NEXT: movb (%edx), %dl
+; X86-32-NEXT: movzbl (%edx), %edx
; X86-32-NEXT: addb (%ecx), %dl
; X86-32-NEXT: jmp .LBB2_3
; X86-32-NEXT: .LBB2_2: # %else
-; X86-32-NEXT: movb (%ecx), %dl
+; X86-32-NEXT: movzbl (%ecx), %edx
; X86-32-NEXT: .LBB2_3: # %exit
; X86-32-NEXT: kmovd %edx, %k1
; X86-32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
; X86-64-NEXT: testb $1, %dil
; X86-64-NEXT: je .LBB4_2
; X86-64-NEXT: # %bb.1: # %if
-; X86-64-NEXT: movb (%rsi), %al
+; X86-64-NEXT: movzbl (%rsi), %eax
; X86-64-NEXT: jmp .LBB4_3
; X86-64-NEXT: .LBB4_2: # %else
-; X86-64-NEXT: movb (%rdx), %al
+; X86-64-NEXT: movzbl (%rdx), %eax
; X86-64-NEXT: .LBB4_3: # %exit
; X86-64-NEXT: andb $1, %al
; X86-64-NEXT: movb %al, (%rcx)
; X86-32-NEXT: .LBB4_2: # %else
; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-32-NEXT: .LBB4_3: # %exit
-; X86-32-NEXT: movb (%ecx), %cl
+; X86-32-NEXT: movzbl (%ecx), %ecx
; X86-32-NEXT: andb $1, %cl
; X86-32-NEXT: movb %cl, (%eax)
; X86-32-NEXT: retl
; X86-64-NEXT: testb $1, %dil
; X86-64-NEXT: je .LBB6_2
; X86-64-NEXT: # %bb.1: # %if
-; X86-64-NEXT: movb (%rsi), %al
+; X86-64-NEXT: movzbl (%rsi), %eax
; X86-64-NEXT: shrb %al
; X86-64-NEXT: jmp .LBB6_3
; X86-64-NEXT: .LBB6_2: # %else
-; X86-64-NEXT: movb (%rdx), %al
+; X86-64-NEXT: movzbl (%rdx), %eax
; X86-64-NEXT: .LBB6_3: # %exit
; X86-64-NEXT: kmovd %eax, %k1
; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1}
; X86-32-NEXT: je .LBB6_2
; X86-32-NEXT: # %bb.1: # %if
; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-32-NEXT: movb (%ecx), %cl
+; X86-32-NEXT: movzbl (%ecx), %ecx
; X86-32-NEXT: shrb %cl
; X86-32-NEXT: jmp .LBB6_3
; X86-32-NEXT: .LBB6_2: # %else
; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-32-NEXT: movb (%ecx), %cl
+; X86-32-NEXT: movzbl (%ecx), %ecx
; X86-32-NEXT: .LBB6_3: # %exit
; X86-32-NEXT: kmovd %ecx, %k1
; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1}
; CHECK-LABEL: foo1:
; CHECK: # %bb.0:
; CHECK-NEXT: movzbl %dh, %ecx
-; CHECK-NEXT: movb (%eax,%ecx), %al
+; CHECK-NEXT: movzbl (%eax,%ecx), %eax
; CHECK-NEXT: retl
%t0 = lshr i32 %x, 8
%t1 = and i32 %t0, 255
; CHECK-LABEL: bar8:
; CHECK: # %bb.0:
; CHECK-NEXT: movzbl %dh, %ecx
-; CHECK-NEXT: movb (%eax,%ecx,8), %al
+; CHECK-NEXT: movzbl (%eax,%ecx,8), %eax
; CHECK-NEXT: retl
%t0 = lshr i32 %x, 5
%t1 = and i32 %t0, 2040
; CHECK-LABEL: bar4:
; CHECK: # %bb.0:
; CHECK-NEXT: movzbl %dh, %ecx
-; CHECK-NEXT: movb (%eax,%ecx,4), %al
+; CHECK-NEXT: movzbl (%eax,%ecx,4), %eax
; CHECK-NEXT: retl
%t0 = lshr i32 %x, 6
%t1 = and i32 %t0, 1020
; CHECK-LABEL: bar2:
; CHECK: # %bb.0:
; CHECK-NEXT: movzbl %dh, %ecx
-; CHECK-NEXT: movb (%eax,%ecx,2), %al
+; CHECK-NEXT: movzbl (%eax,%ecx,2), %eax
; CHECK-NEXT: retl
%t0 = lshr i32 %x, 7
%t1 = and i32 %t0, 510
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: movzbl %ah, %eax
-; CHECK-NEXT: movb (%rdi,%rax), %al
+; CHECK-NEXT: movzbl (%rdi,%rax), %eax
; CHECK-NEXT: retq
%t0 = lshr i64 %x, 8
%t1 = and i64 %t0, 255
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: movzbl %ah, %eax
-; CHECK-NEXT: movb (%rdi,%rax,8), %al
+; CHECK-NEXT: movzbl (%rdi,%rax,8), %eax
; CHECK-NEXT: retq
%t0 = lshr i64 %x, 5
%t1 = and i64 %t0, 2040
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: movzbl %ah, %eax
-; CHECK-NEXT: movb (%rdi,%rax,4), %al
+; CHECK-NEXT: movzbl (%rdi,%rax,4), %eax
; CHECK-NEXT: retq
%t0 = lshr i64 %x, 6
%t1 = and i64 %t0, 1020
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: movzbl %ah, %eax
-; CHECK-NEXT: movb (%rdi,%rax,2), %al
+; CHECK-NEXT: movzbl (%rdi,%rax,2), %eax
; CHECK-NEXT: retq
%t0 = lshr i64 %x, 7
%t1 = and i64 %t0, 510
define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_signbit_eq:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlb %cl, %al
; X86-NEXT: testb $-128, %al
; X86-NEXT: sete %al
define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_lowestbit_eq:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlb %cl, %al
; X86-NEXT: testb $1, %al
; X86-NEXT: sete %al
define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_bitsinmiddle_eq:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlb %cl, %al
; X86-NEXT: testb $24, %al
; X86-NEXT: sete %al
define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
; X86-BMI1-LABEL: scalar_i16_signbit_eq:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: testl $32768, %eax # imm = 0x8000
;
; X86-BMI2-LABEL: scalar_i16_signbit_eq:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: testl $32768, %eax # imm = 0x8000
; X86-BMI2-NEXT: sete %al
define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
; X86-BMI1-LABEL: scalar_i16_lowestbit_eq:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: testb $1, %al
;
; X86-BMI2-LABEL: scalar_i16_lowestbit_eq:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: testb $1, %al
; X86-BMI2-NEXT: sete %al
define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0
;
; X86-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0
; X86-BMI2-NEXT: sete %al
define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
; X86-BMI1-LABEL: scalar_i32_signbit_eq:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: testl $-2147483648, %eax # imm = 0x80000000
;
; X86-BMI2-LABEL: scalar_i32_signbit_eq:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000
; X86-BMI2-NEXT: sete %al
define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
; X86-BMI1-LABEL: scalar_i32_lowestbit_eq:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: testb $1, %al
;
; X86-BMI2-LABEL: scalar_i32_lowestbit_eq:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: testb $1, %al
; X86-BMI2-NEXT: sete %al
define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: testl $16776960, %eax # imm = 0xFFFF00
;
; X86-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00
; X86-BMI2-NEXT: sete %al
; X86-BMI1-LABEL: scalar_i64_signbit_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl %eax, %esi
;
; X86-BMI2-LABEL: scalar_i64_signbit_eq:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shldl %cl, %eax, %edx
define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
; X86-BMI1-LABEL: scalar_i64_lowestbit_eq:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: xorl %edx, %edx
;
; X86-BMI2-LABEL: scalar_i64_lowestbit_eq:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: testb $32, %al
; X86-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl %eax, %esi
; X86-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shldl %cl, %eax, %edx
define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_signbit_ne:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlb %cl, %al
; X86-NEXT: shrb $7, %al
; X86-NEXT: retl
define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
; X86-BMI1-LABEL: scalar_i32_x_is_const2_eq:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $1, %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: testl %eax, %eax
;
; X86-BMI2-LABEL: scalar_i32_x_is_const2_eq:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl $1, %ecx
; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
; X86-BMI2-NEXT: testl %eax, %eax
define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_signbit_eq_with_nonzero:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movb $-128, %al
; X86-NEXT: shrb %cl, %al
; X86-NEXT: andb {{[0-9]+}}(%esp), %al
define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_signbit_eq:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrb %cl, %al
; X86-NEXT: testb $-128, %al
; X86-NEXT: sete %al
define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_bitsinmiddle_eq:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrb %cl, %al
; X86-NEXT: testb $24, %al
; X86-NEXT: sete %al
define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
; X86-BMI1-LABEL: scalar_i16_signbit_eq:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: testl $32768, %eax # imm = 0x8000
; X86-BMI2-LABEL: scalar_i16_signbit_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
; X86-BMI2-NEXT: testl $32768, %eax # imm = 0x8000
; X86-BMI2-NEXT: sete %al
define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0
; X86-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
; X86-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0
; X86-BMI2-NEXT: sete %al
define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
; X86-BMI1-LABEL: scalar_i32_signbit_eq:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: testl $-2147483648, %eax # imm = 0x80000000
;
; X86-BMI2-LABEL: scalar_i32_signbit_eq:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000
; X86-BMI2-NEXT: sete %al
define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: testl $16776960, %eax # imm = 0xFFFF00
;
; X86-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00
; X86-BMI2-NEXT: sete %al
define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
; X86-BMI1-LABEL: scalar_i64_signbit_eq:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: shrl %cl, %eax
; X86-BMI1-NEXT: xorl %edx, %edx
;
; X86-BMI2-LABEL: scalar_i64_signbit_eq:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: testb $32, %al
; X86-BMI1-LABEL: scalar_i64_lowestbit_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $1, %eax
; X86-BMI1-NEXT: xorl %esi, %esi
; X86-BMI1-NEXT: xorl %edx, %edx
; X86-BMI2-LABEL: scalar_i64_lowestbit_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl $1, %eax
; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: xorl %esi, %esi
; X86-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl %edx, %esi
; X86-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: shrdl %cl, %edx, %eax
define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_signbit_ne:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrb %cl, %al
; X86-NEXT: shrb $7, %al
; X86-NEXT: retl
define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
; X86-BMI1-LABEL: scalar_i32_x_is_const_eq:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-BMI1-NEXT: movl $-1437226411, %eax # imm = 0xAA55AA55
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: testb $1, %al
;
; X86-BMI2-LABEL: scalar_i32_x_is_const_eq:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movl $-1437226411, %ecx # imm = 0xAA55AA55
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
; X86-BMI2-NEXT: testb $1, %al
define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
; X86-LABEL: negative_scalar_i8_bitsinmiddle_slt:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movb $24, %al
; X86-NEXT: shlb %cl, %al
; X86-NEXT: andb {{[0-9]+}}(%esp), %al
define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8_signbit_eq_with_nonzero:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movb $-128, %al
; X86-NEXT: shlb %cl, %al
; X86-NEXT: andb {{[0-9]+}}(%esp), %al
define i8 @test_i8(i8 %a) nounwind {
; X86-LABEL: test_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: sarb $7, %cl
; X86-NEXT: xorb %cl, %al
define i8 @scalar_i8(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addb {{[0-9]+}}(%esp), %al
; X86-NEXT: incb %al
; X86-NEXT: retl
; SSE-LABEL: load_i8_v32i8_undef:
; SSE: # %bb.0:
; SSE-NEXT: # kill: def $esi killed $esi def $rsi
-; SSE-NEXT: movb (%rdi), %al
+; SSE-NEXT: movzbl (%rdi), %eax
; SSE-NEXT: andl $31, %esi
; SSE-NEXT: movb %al, -40(%rsp,%rsi)
; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
; X86AVX2-NEXT: subl $32, %esp
; X86AVX2-NEXT: movl 12(%ebp), %eax
; X86AVX2-NEXT: andl $15, %eax
-; X86AVX2-NEXT: movb 8(%ebp), %cl
+; X86AVX2-NEXT: movzbl 8(%ebp), %ecx
; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
; X86AVX2-NEXT: movb %cl, (%esp,%eax)
; X86AVX2-NEXT: vmovaps (%esp), %xmm0
; SSE-LABEL: load_i8_v16i8:
; SSE: # %bb.0:
; SSE-NEXT: # kill: def $esi killed $esi def $rsi
-; SSE-NEXT: movb (%rdi), %al
+; SSE-NEXT: movzbl (%rdi), %eax
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: andl $15, %esi
; SSE-NEXT: movb %al, -24(%rsp,%rsi)
; AVX1OR2-LABEL: load_i8_v16i8:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX1OR2-NEXT: movb (%rdi), %al
+; AVX1OR2-NEXT: movzbl (%rdi), %eax
; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX1OR2-NEXT: andl $15, %esi
; AVX1OR2-NEXT: movb %al, -24(%rsp,%rsi)
; AVX512F-LABEL: load_i8_v16i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX512F-NEXT: movb (%rdi), %al
+; AVX512F-NEXT: movzbl (%rdi), %eax
; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: andl $15, %esi
; AVX512F-NEXT: movb %al, -24(%rsp,%rsi)
; X86AVX2-NEXT: movl 12(%ebp), %eax
; X86AVX2-NEXT: andl $15, %eax
; X86AVX2-NEXT: movl 8(%ebp), %ecx
-; X86AVX2-NEXT: movb (%ecx), %cl
+; X86AVX2-NEXT: movzbl (%ecx), %ecx
; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
; X86AVX2-NEXT: movb %cl, (%esp,%eax)
; X86AVX2-NEXT: vmovaps (%esp), %xmm0
; X86AVX2-NEXT: subl $64, %esp
; X86AVX2-NEXT: movl 12(%ebp), %eax
; X86AVX2-NEXT: andl $31, %eax
-; X86AVX2-NEXT: movb 8(%ebp), %cl
+; X86AVX2-NEXT: movzbl 8(%ebp), %ecx
; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
; X86AVX2-NEXT: movb %cl, (%esp,%eax)
; X86AVX2-NEXT: vmovaps (%esp), %ymm0
; SSE-LABEL: load_i8_v32i8:
; SSE: # %bb.0:
; SSE-NEXT: # kill: def $esi killed $esi def $rsi
-; SSE-NEXT: movb (%rdi), %al
+; SSE-NEXT: movzbl (%rdi), %eax
; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: andl $31, %esi
; AVX1OR2-NEXT: andq $-32, %rsp
; AVX1OR2-NEXT: subq $64, %rsp
; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX1OR2-NEXT: movb (%rdi), %al
+; AVX1OR2-NEXT: movzbl (%rdi), %eax
; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp)
; AVX1OR2-NEXT: andl $31, %esi
; AVX1OR2-NEXT: movb %al, (%rsp,%rsi)
; AVX512F-NEXT: andq $-32, %rsp
; AVX512F-NEXT: subq $64, %rsp
; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi
-; AVX512F-NEXT: movb (%rdi), %al
+; AVX512F-NEXT: movzbl (%rdi), %eax
; AVX512F-NEXT: vmovaps %ymm0, (%rsp)
; AVX512F-NEXT: andl $31, %esi
; AVX512F-NEXT: movb %al, (%rsp,%rsi)
; X86AVX2-NEXT: movl 12(%ebp), %eax
; X86AVX2-NEXT: andl $31, %eax
; X86AVX2-NEXT: movl 8(%ebp), %ecx
-; X86AVX2-NEXT: movb (%ecx), %cl
+; X86AVX2-NEXT: movzbl (%ecx), %ecx
; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
; X86AVX2-NEXT: movb %cl, (%esp,%eax)
; X86AVX2-NEXT: vmovaps (%esp), %ymm0
; CHECK-NEXT: cmpb $0, 4(%eax)
; CHECK-NEXT: je .LBB0_1
; CHECK-NEXT: # %bb.2: # %F
-; CHECK-NEXT: movb 7(%eax), %al
+; CHECK-NEXT: movzbl 7(%eax), %eax
; CHECK-NEXT: retl
; CHECK-NEXT: .LBB0_1: # %TB
; CHECK-NEXT: movb $4, %al
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: andb $7, %cl
; CHECK-NEXT: movl %edx, %eax
; CHECK-NEXT: shll %cl, %eax
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: andb $7, %cl
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: shll %cl, %eax
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: andb $7, %cl
; CHECK-NEXT: shrdl %cl, %edx, %eax
; CHECK-NEXT: shrl %cl, %edx
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: andb $7, %cl
; CHECK-NEXT: shrdl %cl, %edx, %eax
; CHECK-NEXT: sarl %cl, %edx
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: .LBB0_1:
; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
; CHECK-NEXT: .LBB0_3: # %_ZNSt3__312basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEED2Ev.exit50
-; CHECK-NEXT: movb 16(%rax), %al
+; CHECK-NEXT: movzbl 16(%rax), %eax
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: movq %rdi, %r14
-; CHECK-NEXT: movb (%rdx), %al
+; CHECK-NEXT: movzbl (%rdx), %eax
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: shrb %cl
; CHECK-NEXT: andb $1, %cl
; CHECK: # %bb.0: # %Entry
; CHECK-NEXT: movl __unnamed_1(%rip), %eax
; CHECK-NEXT: movl %eax, -12(%rsp)
-; CHECK-NEXT: movb -9(%rsp), %cl
+; CHECK-NEXT: movzbl -9(%rsp), %ecx
; CHECK-NEXT: movzbl -10(%rsp), %edx
; CHECK-NEXT: movzbl -11(%rsp), %esi
; CHECK-NEXT: andl $31, %eax
define <16 x i8> @sub_op1_constant(ptr %p) nounwind {
; SSE-LABEL: sub_op1_constant:
; SSE: # %bb.0:
-; SSE-NEXT: movb (%rdi), %al
+; SSE-NEXT: movzbl (%rdi), %eax
; SSE-NEXT: addb $-42, %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movd %eax, %xmm0
;
; AVX-LABEL: sub_op1_constant:
; AVX: # %bb.0:
-; AVX-NEXT: movb (%rdi), %al
+; AVX-NEXT: movzbl (%rdi), %eax
; AVX-NEXT: addb $-42, %al
; AVX-NEXT: movzbl %al, %eax
; AVX-NEXT: vmovd %eax, %xmm0
define <4 x i32> @shl_op0_constant(ptr %p) nounwind {
; SSE-LABEL: shl_op0_constant:
; SSE: # %bb.0:
-; SSE-NEXT: movb (%rdi), %cl
+; SSE-NEXT: movzbl (%rdi), %ecx
; SSE-NEXT: movl $42, %eax
; SSE-NEXT: shll %cl, %eax
; SSE-NEXT: movd %eax, %xmm0
;
; AVX-LABEL: shl_op0_constant:
; AVX: # %bb.0:
-; AVX-NEXT: movb (%rdi), %cl
+; AVX-NEXT: movzbl (%rdi), %ecx
; AVX-NEXT: movl $42, %eax
; AVX-NEXT: shll %cl, %eax
; AVX-NEXT: vmovd %eax, %xmm0
define <16 x i8> @shl_op1_constant(ptr %p) nounwind {
; SSE-LABEL: shl_op1_constant:
; SSE: # %bb.0:
-; SSE-NEXT: movb (%rdi), %al
+; SSE-NEXT: movzbl (%rdi), %eax
; SSE-NEXT: shlb $5, %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movd %eax, %xmm0
;
; AVX-LABEL: shl_op1_constant:
; AVX: # %bb.0:
-; AVX-NEXT: movb (%rdi), %al
+; AVX-NEXT: movzbl (%rdi), %eax
; AVX-NEXT: shlb $5, %al
; AVX-NEXT: movzbl %al, %eax
; AVX-NEXT: vmovd %eax, %xmm0
define <2 x i64> @lshr_op0_constant(ptr %p) nounwind {
; SSE-LABEL: lshr_op0_constant:
; SSE: # %bb.0:
-; SSE-NEXT: movb (%rdi), %cl
+; SSE-NEXT: movzbl (%rdi), %ecx
; SSE-NEXT: movl $42, %eax
; SSE-NEXT: shrq %cl, %rax
; SSE-NEXT: movq %rax, %xmm0
;
; AVX-LABEL: lshr_op0_constant:
; AVX: # %bb.0:
-; AVX-NEXT: movb (%rdi), %cl
+; AVX-NEXT: movzbl (%rdi), %ecx
; AVX-NEXT: movl $42, %eax
; AVX-NEXT: shrq %cl, %rax
; AVX-NEXT: vmovq %rax, %xmm0
define <8 x i16> @ashr_op0_constant(ptr %p) nounwind {
; SSE-LABEL: ashr_op0_constant:
; SSE: # %bb.0:
-; SSE-NEXT: movb (%rdi), %cl
+; SSE-NEXT: movzbl (%rdi), %ecx
; SSE-NEXT: movl $-42, %eax
; SSE-NEXT: sarl %cl, %eax
; SSE-NEXT: movd %eax, %xmm0
;
; AVX-LABEL: ashr_op0_constant:
; AVX: # %bb.0:
-; AVX-NEXT: movb (%rdi), %cl
+; AVX-NEXT: movzbl (%rdi), %ecx
; AVX-NEXT: movl $-42, %eax
; AVX-NEXT: sarl %cl, %eax
; AVX-NEXT: vmovd %eax, %xmm0
define <16 x i8> @urem_op1_constant(ptr %p) nounwind {
; SSE-LABEL: urem_op1_constant:
; SSE: # %bb.0:
-; SSE-NEXT: movb (%rdi), %al
+; SSE-NEXT: movzbl (%rdi), %eax
; SSE-NEXT: movl %eax, %ecx
; SSE-NEXT: shrb %cl
; SSE-NEXT: movzbl %cl, %ecx
;
; AVX-LABEL: urem_op1_constant:
; AVX: # %bb.0:
-; AVX-NEXT: movb (%rdi), %al
+; AVX-NEXT: movzbl (%rdi), %eax
; AVX-NEXT: movl %eax, %ecx
; AVX-NEXT: shrb %cl
; AVX-NEXT: movzbl %cl, %ecx
; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
; KNL_32-NEXT: movw $-3, %ax
; KNL_32-NEXT: kmovw %eax, %k0
-; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: andl $1, %eax
; KNL_32-NEXT: kmovw %eax, %k1
; KNL_32-NEXT: kandw %k0, %k1, %k0
-; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: kmovw %eax, %k1
; KNL_32-NEXT: kshiftlw $15, %k1, %k1
; KNL_32-NEXT: kshiftrw $14, %k1, %k1
; KNL_32-NEXT: movw $-5, %ax
; KNL_32-NEXT: kmovw %eax, %k1
; KNL_32-NEXT: kandw %k1, %k0, %k0
-; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: kmovw %eax, %k1
; KNL_32-NEXT: kshiftlw $15, %k1, %k1
; KNL_32-NEXT: kshiftrw $13, %k1, %k1
; SKX_32: # %bb.0:
; SKX_32-NEXT: movb $-3, %al
; SKX_32-NEXT: kmovw %eax, %k0
-; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al
+; SKX_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: kmovw %eax, %k1
; SKX_32-NEXT: kshiftlb $7, %k1, %k1
; SKX_32-NEXT: kshiftrb $7, %k1, %k1
; SKX_32-NEXT: kandw %k0, %k1, %k0
-; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al
+; SKX_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: kmovw %eax, %k1
; SKX_32-NEXT: kshiftlb $7, %k1, %k1
; SKX_32-NEXT: kshiftrb $6, %k1, %k1
; SKX_32-NEXT: movb $-5, %al
; SKX_32-NEXT: kmovw %eax, %k1
; SKX_32-NEXT: kandw %k1, %k0, %k0
-; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al
+; SKX_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: kmovw %eax, %k1
; SKX_32-NEXT: kshiftlb $7, %k1, %k1
; SKX_32-NEXT: kshiftrb $5, %k1, %k1
; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
; KNL_32-NEXT: movw $-3, %ax
; KNL_32-NEXT: kmovw %eax, %k0
-; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: andl $1, %eax
; KNL_32-NEXT: kmovw %eax, %k1
; KNL_32-NEXT: kandw %k0, %k1, %k0
-; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: kmovw %eax, %k1
; KNL_32-NEXT: kshiftlw $15, %k1, %k1
; KNL_32-NEXT: kshiftrw $14, %k1, %k1
; KNL_32-NEXT: movw $-5, %ax
; KNL_32-NEXT: kmovw %eax, %k1
; KNL_32-NEXT: kandw %k1, %k0, %k0
-; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al
+; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: kmovw %eax, %k1
; KNL_32-NEXT: kshiftlw $15, %k1, %k1
; KNL_32-NEXT: kshiftrw $13, %k1, %k1
; SKX_32: # %bb.0:
; SKX_32-NEXT: movb $-3, %al
; SKX_32-NEXT: kmovw %eax, %k0
-; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al
+; SKX_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: kmovw %eax, %k1
; SKX_32-NEXT: kshiftlb $7, %k1, %k1
; SKX_32-NEXT: kshiftrb $7, %k1, %k1
; SKX_32-NEXT: kandw %k0, %k1, %k0
-; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al
+; SKX_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: kmovw %eax, %k1
; SKX_32-NEXT: kshiftlb $7, %k1, %k1
; SKX_32-NEXT: kshiftrb $6, %k1, %k1
; SKX_32-NEXT: movb $-5, %al
; SKX_32-NEXT: kmovw %eax, %k1
; SKX_32-NEXT: kandw %k1, %k0, %k0
-; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al
+; SKX_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: kmovw %eax, %k1
; SKX_32-NEXT: kshiftlb $7, %k1, %k1
; SKX_32-NEXT: kshiftrb $5, %k1, %k1
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%ecx), %edx
; X86-NEXT: xorw (%eax), %dx
-; X86-NEXT: movb 2(%ecx), %cl
+; X86-NEXT: movzbl 2(%ecx), %ecx
; X86-NEXT: xorb 2(%eax), %cl
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: orw %dx, %ax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %edx
; X86-NEXT: xorl (%eax), %edx
-; X86-NEXT: movb 4(%ecx), %cl
+; X86-NEXT: movzbl 4(%ecx), %ecx
; X86-NEXT: xorb 4(%eax), %cl
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: orl %edx, %eax
; X86-NEXT: xorl (%eax), %edx
; X86-NEXT: xorl 4(%eax), %esi
; X86-NEXT: orl %edx, %esi
-; X86-NEXT: movb 8(%ecx), %cl
+; X86-NEXT: movzbl 8(%ecx), %ecx
; X86-NEXT: xorb 8(%eax), %cl
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: orl %esi, %eax
; X86-NEXT: orl %esi, %eax
; X86-NEXT: movl 8(%edx), %esi
; X86-NEXT: xorl 8(%ecx), %esi
-; X86-NEXT: movb 12(%edx), %dl
+; X86-NEXT: movzbl 12(%edx), %edx
; X86-NEXT: xorb 12(%ecx), %dl
; X86-NEXT: movzbl %dl, %ecx
; X86-NEXT: orl %esi, %ecx
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: xorw (%rsi), %ax
-; X64-NEXT: movb 2(%rdi), %cl
+; X64-NEXT: movzbl 2(%rdi), %ecx
; X64-NEXT: xorb 2(%rsi), %cl
; X64-NEXT: movzbl %cl, %ecx
; X64-NEXT: orw %ax, %cx
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: xorl (%rsi), %eax
-; X64-NEXT: movb 4(%rdi), %cl
+; X64-NEXT: movzbl 4(%rdi), %ecx
; X64-NEXT: xorb 4(%rsi), %cl
; X64-NEXT: movzbl %cl, %ecx
; X64-NEXT: orl %eax, %ecx
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: movb 8(%rdi), %cl
+; X64-NEXT: movzbl 8(%rdi), %ecx
; X64-NEXT: xorb 8(%rsi), %cl
; X64-NEXT: movzbl %cl, %ecx
; X64-NEXT: orq %rax, %rcx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%ecx), %edx
; X86-NEXT: xorw (%eax), %dx
-; X86-NEXT: movb 2(%ecx), %cl
+; X86-NEXT: movzbl 2(%ecx), %ecx
; X86-NEXT: xorb 2(%eax), %cl
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: orw %dx, %ax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %edx
; X86-NEXT: xorl (%eax), %edx
-; X86-NEXT: movb 4(%ecx), %cl
+; X86-NEXT: movzbl 4(%ecx), %ecx
; X86-NEXT: xorb 4(%eax), %cl
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: orl %edx, %eax
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: xorw (%rsi), %ax
-; X64-NEXT: movb 2(%rdi), %cl
+; X64-NEXT: movzbl 2(%rdi), %ecx
; X64-NEXT: xorb 2(%rsi), %cl
; X64-NEXT: movzbl %cl, %ecx
; X64-NEXT: orw %ax, %cx
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: xorl (%rsi), %eax
-; X64-NEXT: movb 4(%rdi), %cl
+; X64-NEXT: movzbl 4(%rdi), %ecx
; X64-NEXT: xorb 4(%rsi), %cl
; X64-NEXT: movzbl %cl, %ecx
; X64-NEXT: orl %eax, %ecx
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: xorq (%rsi), %rax
-; X64-NEXT: movb 8(%rdi), %cl
+; X64-NEXT: movzbl 8(%rdi), %ecx
; X64-NEXT: xorb 8(%rsi), %cl
; X64-NEXT: movzbl %cl, %ecx
; X64-NEXT: orq %rax, %rcx
; unaligned loads and stores.
; DARWIN-LABEL: PR15348:
; DARWIN: ## %bb.0:
-; DARWIN-NEXT: movb 16(%rsi), %al
+; DARWIN-NEXT: movzbl 16(%rsi), %eax
; DARWIN-NEXT: movb %al, 16(%rdi)
; DARWIN-NEXT: movq (%rsi), %rax
; DARWIN-NEXT: movq 8(%rsi), %rcx
;
; LINUX-LABEL: PR15348:
; LINUX: # %bb.0:
-; LINUX-NEXT: movb 16(%rsi), %al
+; LINUX-NEXT: movzbl 16(%rsi), %eax
; LINUX-NEXT: movb %al, 16(%rdi)
; LINUX-NEXT: movq (%rsi), %rax
; LINUX-NEXT: movq 8(%rsi), %rcx
;
; LINUX-SKL-LABEL: PR15348:
; LINUX-SKL: # %bb.0:
-; LINUX-SKL-NEXT: movb 16(%rsi), %al
+; LINUX-SKL-NEXT: movzbl 16(%rsi), %eax
; LINUX-SKL-NEXT: movb %al, 16(%rdi)
; LINUX-SKL-NEXT: vmovups (%rsi), %xmm0
; LINUX-SKL-NEXT: vmovups %xmm0, (%rdi)
;
; LINUX-SKX-LABEL: PR15348:
; LINUX-SKX: # %bb.0:
-; LINUX-SKX-NEXT: movb 16(%rsi), %al
+; LINUX-SKX-NEXT: movzbl 16(%rsi), %eax
; LINUX-SKX-NEXT: movb %al, 16(%rdi)
; LINUX-SKX-NEXT: vmovups (%rsi), %xmm0
; LINUX-SKX-NEXT: vmovups %xmm0, (%rdi)
;
; LINUX-KNL-LABEL: PR15348:
; LINUX-KNL: # %bb.0:
-; LINUX-KNL-NEXT: movb 16(%rsi), %al
+; LINUX-KNL-NEXT: movzbl 16(%rsi), %eax
; LINUX-KNL-NEXT: movb %al, 16(%rdi)
; LINUX-KNL-NEXT: vmovups (%rsi), %xmm0
; LINUX-KNL-NEXT: vmovups %xmm0, (%rdi)
;
; LINUX-AVX512BW-LABEL: PR15348:
; LINUX-AVX512BW: # %bb.0:
-; LINUX-AVX512BW-NEXT: movb 16(%rsi), %al
+; LINUX-AVX512BW-NEXT: movzbl 16(%rsi), %eax
; LINUX-AVX512BW-NEXT: movb %al, 16(%rdi)
; LINUX-AVX512BW-NEXT: vmovups (%rsi), %xmm0
; LINUX-AVX512BW-NEXT: vmovups %xmm0, (%rdi)
; X86-SSE1-NEXT: movl 3(%ecx), %esi
; X86-SSE1-NEXT: movl 7(%ecx), %edi
; X86-SSE1-NEXT: movzwl 11(%ecx), %ebx
-; X86-SSE1-NEXT: movb 13(%ecx), %dl
-; X86-SSE1-NEXT: movb 15(%ecx), %cl
+; X86-SSE1-NEXT: movzbl 13(%ecx), %edx
+; X86-SSE1-NEXT: movzbl 15(%ecx), %ecx
; X86-SSE1-NEXT: movb %dl, 13(%eax)
; X86-SSE1-NEXT: movb %cl, 15(%eax)
; X86-SSE1-NEXT: movw %bx, 11(%eax)
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE1-NEXT: movzwl (%ecx), %edx
-; X86-SSE1-NEXT: movb 3(%ecx), %cl
+; X86-SSE1-NEXT: movzbl 3(%ecx), %ecx
; X86-SSE1-NEXT: movb %cl, 3(%eax)
; X86-SSE1-NEXT: movw %dx, (%eax)
; X86-SSE1-NEXT: movb $0, 15(%eax)
; X86-LABEL: {{^}}merge_store_partial_overlap_load:
; X86-DAG: movzwl ([[BASEREG:%[a-z]+]]), %e[[LO2:[a-z]+]]
-; X86-DAG: movb 2([[BASEREG]]), [[HI1:%[a-z]+]]
+; X86-DAG: movzbl 2([[BASEREG]]), %e[[HI1:[a-z]]]
; X86-NEXT: movw %[[LO2]], 1([[BASEREG]])
-; X86-NEXT: movb [[HI1]], 3([[BASEREG]])
+; X86-NEXT: movb %[[HI1]]l, 3([[BASEREG]])
; X86-NEXT: retq
; DBGDAG-LABEL: Optimized legalized selection DAG: %bb.0 'merge_store_partial_overlap_load:'
;
; X86-LABEL: scalar_i8_signed_reg_reg:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmpb %al, %cl
; X86-NEXT: setle %dl
; X86-NEXT: jg .LBB15_1
;
; X86-LABEL: scalar_i8_unsigned_reg_reg:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmpb %al, %cl
; X86-NEXT: setbe %dl
; X86-NEXT: ja .LBB16_1
;
; X86-LABEL: scalar_i8_signed_mem_reg:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movb (%ecx), %cl
+; X86-NEXT: movzbl (%ecx), %ecx
; X86-NEXT: cmpb %al, %cl
; X86-NEXT: setle %dl
; X86-NEXT: jg .LBB17_1
;
; X86-LABEL: scalar_i8_signed_reg_mem:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb (%eax), %al
+; X86-NEXT: movzbl (%eax), %eax
; X86-NEXT: cmpb %al, %cl
; X86-NEXT: setle %dl
; X86-NEXT: jg .LBB18_1
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movb (%ecx), %cl
-; X86-NEXT: movb (%eax), %al
+; X86-NEXT: movzbl (%ecx), %ecx
+; X86-NEXT: movzbl (%eax), %eax
; X86-NEXT: cmpb %al, %cl
; X86-NEXT: setle %dl
; X86-NEXT: jg .LBB19_1
; CHECK-NEXT: .cfi_offset %esi, -16
; CHECK-NEXT: .cfi_offset %ebx, -12
; CHECK-NEXT: movl f, %esi
-; CHECK-NEXT: movb (%esi), %al
+; CHECK-NEXT: movzbl (%esi), %eax
; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: xorl %edx, %edx
; SKX-NEXT: vpmovm2b %k0, %xmm0
; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; SKX-NEXT: andl $15, %edi
-; SKX-NEXT: movb -24(%rsp,%rdi), %al
+; SKX-NEXT: movzbl -24(%rsp,%rdi), %eax
; SKX-NEXT: retq
%cmp = icmp eq <16 x i8> %x, %y
%val = extractelement <16 x i1> %cmp, i32 %z
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: andl $7, %edi
-; KNL-NEXT: movb -24(%rsp,%rdi,2), %al
+; KNL-NEXT: movzbl -24(%rsp,%rdi,2), %eax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-NEXT: vpmovm2w %k0, %xmm0
; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; SKX-NEXT: andl $7, %edi
-; SKX-NEXT: movb -24(%rsp,%rdi,2), %al
+; SKX-NEXT: movzbl -24(%rsp,%rdi,2), %eax
; SKX-NEXT: retq
%cmp = icmp sgt <8 x i16> %x, %y
%val = extractelement <8 x i1> %cmp, i32 %z
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: andl $3, %edi
-; KNL-NEXT: movb -24(%rsp,%rdi,4), %al
+; KNL-NEXT: movzbl -24(%rsp,%rdi,4), %eax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-NEXT: vpmovm2d %k0, %xmm0
; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; SKX-NEXT: andl $3, %edi
-; SKX-NEXT: movb -24(%rsp,%rdi,4), %al
+; SKX-NEXT: movzbl -24(%rsp,%rdi,4), %eax
; SKX-NEXT: retq
%cmp = icmp slt <4 x i32> %x, %y
%val = extractelement <4 x i1> %cmp, i32 %z
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: andl $1, %edi
-; KNL-NEXT: movb -24(%rsp,%rdi,8), %al
+; KNL-NEXT: movzbl -24(%rsp,%rdi,8), %eax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-NEXT: vpmovm2q %k0, %xmm0
; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; SKX-NEXT: andl $1, %edi
-; SKX-NEXT: movb -24(%rsp,%rdi,8), %al
+; SKX-NEXT: movzbl -24(%rsp,%rdi,8), %eax
; SKX-NEXT: retq
%cmp = icmp ne <2 x i64> %x, %y
%val = extractelement <2 x i1> %cmp, i32 %z
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: andl $3, %edi
-; KNL-NEXT: movb -24(%rsp,%rdi,4), %al
+; KNL-NEXT: movzbl -24(%rsp,%rdi,4), %eax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-NEXT: vpmovm2d %k0, %xmm0
; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; SKX-NEXT: andl $3, %edi
-; SKX-NEXT: movb -24(%rsp,%rdi,4), %al
+; SKX-NEXT: movzbl -24(%rsp,%rdi,4), %eax
; SKX-NEXT: retq
%cmp = fcmp ueq <4 x float> %x, %y
%val = extractelement <4 x i1> %cmp, i32 %z
; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: andl $1, %edi
-; KNL-NEXT: movb -24(%rsp,%rdi,8), %al
+; KNL-NEXT: movzbl -24(%rsp,%rdi,8), %eax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-NEXT: vpmovm2q %k0, %xmm0
; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; SKX-NEXT: andl $1, %edi
-; SKX-NEXT: movb -24(%rsp,%rdi,8), %al
+; SKX-NEXT: movzbl -24(%rsp,%rdi,8), %eax
; SKX-NEXT: retq
%cmp = fcmp oge <2 x double> %x, %y
%val = extractelement <2 x i1> %cmp, i32 %z
; LINUX-NEXT: movq %rbp, %rdx
; LINUX-NEXT: movq %r13, %rcx
; LINUX-NEXT: movq %r12, %r8
-; LINUX-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; LINUX-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; LINUX-NEXT: movq %r15, %r9
; LINUX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; LINUX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; LINUX-X32-NEXT: movq %rbp, %rdx
; LINUX-X32-NEXT: movq %r13, %rcx
; LINUX-X32-NEXT: movq %r12, %r8
-; LINUX-X32-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; LINUX-X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; LINUX-X32-NEXT: movq %r15, %r9
; LINUX-X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; LINUX-X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
define i8 @neg_abs_i8(i8 %x) nounwind {
; X86-LABEL: neg_abs_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarb $7, %al
; X86-NEXT: xorb %al, %cl
define i8 @sub_abs_i8(i8 %x, i8 %y) nounwind {
; X86-LABEL: sub_abs_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarb $7, %al
; X86-NEXT: xorb %al, %cl
;
; X32-LABEL: select_i8_neg1_or_0:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: andb $1, %al
; X32-NEXT: negb %al
; X32-NEXT: retl
;
; X32-LABEL: select_i8_neg1_or_0_zeroext:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: negb %al
; X32-NEXT: retl
%b = sext i1 %a to i8
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pandn %xmm0, %xmm2
; SSE2-NEXT: por %xmm1, %xmm2
-; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: movb %al, 6(%rdi)
; SSE2-NEXT: movd %xmm2, (%rdi)
; SSE2-NEXT: pextrw $2, %xmm2, %eax
define i8 @or_i8_ri(i8 zeroext %0, i8 zeroext %1) {
; X86-LABEL: or_i8_ri:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: orb $-17, %cl
; X86-NEXT: je .LBB0_2
define i8 @or_i8_rr(i8 zeroext %0, i8 zeroext %1) {
; X86-LABEL: or_i8_rr:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orb %al, %cl
; X86-NEXT: je .LBB1_2
; X86-NEXT: # %bb.1:
define i8 @bar() nounwind {
; CHECK-LABEL: bar:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movb bara+19, %al
+; CHECK-NEXT: movzbl bara+19, %eax
; CHECK-NEXT: addb bara+4, %al
; CHECK-NEXT: retl
entry:
define i1 @plus_one() nounwind {
; CHECK32-LABEL: plus_one:
; CHECK32: # %bb.0: # %entry
-; CHECK32-NEXT: movb M, %al
+; CHECK32-NEXT: movzbl M, %eax
; CHECK32-NEXT: incl L
; CHECK32-NEXT: jne .LBB0_2
; CHECK32-NEXT: # %bb.1: # %entry
;
; CHECK64-LABEL: plus_one:
; CHECK64: # %bb.0: # %entry
-; CHECK64-NEXT: movb M(%rip), %al
+; CHECK64-NEXT: movzbl M(%rip), %eax
; CHECK64-NEXT: incl L(%rip)
; CHECK64-NEXT: jne .LBB0_2
; CHECK64-NEXT: # %bb.1: # %entry
define i1 @plus_forty_two() nounwind {
; CHECK32-LABEL: plus_forty_two:
; CHECK32: # %bb.0: # %entry
-; CHECK32-NEXT: movb M, %al
+; CHECK32-NEXT: movzbl M, %eax
; CHECK32-NEXT: addl $42, L
; CHECK32-NEXT: jne .LBB1_2
; CHECK32-NEXT: # %bb.1: # %entry
;
; CHECK64-LABEL: plus_forty_two:
; CHECK64: # %bb.0: # %entry
-; CHECK64-NEXT: movb M(%rip), %al
+; CHECK64-NEXT: movzbl M(%rip), %eax
; CHECK64-NEXT: addl $42, L(%rip)
; CHECK64-NEXT: jne .LBB1_2
; CHECK64-NEXT: # %bb.1: # %entry
define i1 @minus_one() nounwind {
; CHECK32-LABEL: minus_one:
; CHECK32: # %bb.0: # %entry
-; CHECK32-NEXT: movb M, %al
+; CHECK32-NEXT: movzbl M, %eax
; CHECK32-NEXT: decl L
; CHECK32-NEXT: jne .LBB2_2
; CHECK32-NEXT: # %bb.1: # %entry
;
; CHECK64-LABEL: minus_one:
; CHECK64: # %bb.0: # %entry
-; CHECK64-NEXT: movb M(%rip), %al
+; CHECK64-NEXT: movzbl M(%rip), %eax
; CHECK64-NEXT: decl L(%rip)
; CHECK64-NEXT: jne .LBB2_2
; CHECK64-NEXT: # %bb.1: # %entry
define i1 @minus_forty_two() nounwind {
; CHECK32-LABEL: minus_forty_two:
; CHECK32: # %bb.0: # %entry
-; CHECK32-NEXT: movb M, %al
+; CHECK32-NEXT: movzbl M, %eax
; CHECK32-NEXT: addl $-42, L
; CHECK32-NEXT: jne .LBB3_2
; CHECK32-NEXT: # %bb.1: # %entry
;
; CHECK64-LABEL: minus_forty_two:
; CHECK64: # %bb.0: # %entry
-; CHECK64-NEXT: movb M(%rip), %al
+; CHECK64-NEXT: movzbl M(%rip), %eax
; CHECK64-NEXT: addl $-42, L(%rip)
; CHECK64-NEXT: jne .LBB3_2
; CHECK64-NEXT: # %bb.1: # %entry
define i8 @cnt8(i8 %x) nounwind readnone {
; X86-LABEL: cnt8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb %al
; X86-NEXT: andb $85, %al
define zeroext i1 @f1(ptr %x) {
; CHECK-LABEL: f1:
; CHECK: ## %bb.0: ## %entry
-; CHECK-NEXT: movb (%rdi), %al
+; CHECK-NEXT: movzbl (%rdi), %eax
; CHECK-NEXT: retq
entry:
define zeroext i1 @f2(ptr %x) {
; CHECK-LABEL: f2:
; CHECK: ## %bb.0: ## %entry
-; CHECK-NEXT: movb (%rdi), %al
+; CHECK-NEXT: movzbl (%rdi), %eax
; CHECK-NEXT: retq
entry:
define <4 x i1> @test2(ptr %in) nounwind {
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
-; CHECK-NEXT: movb (%rdi), %al
+; CHECK-NEXT: movzbl (%rdi), %eax
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: shrb %cl
; CHECK-NEXT: andb $1, %cl
define <4 x i64> @test3(ptr %in) nounwind {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
-; CHECK-NEXT: movb (%rdi), %al
+; CHECK-NEXT: movzbl (%rdi), %eax
; CHECK-NEXT: movzbl %al, %ecx
; CHECK-NEXT: shrb %al
; CHECK-NEXT: movzbl %al, %eax
; X86-LABEL: crash:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shlb $2, %dl
; X86-NEXT: andb $3, %cl
; X86-NEXT: orb %dl, %cl
; X86-LABEL: PR22473:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb (%eax), %al
+; X86-NEXT: movzbl (%eax), %eax
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
; X86-NEXT: sete %al
; X86-NEXT: retl
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s
@d = global i32 0, align 4
; Verify the sar happens before ecx is clobbered with the parameter being
; passed to fn3
-; CHECK-LABEL: fn4
-; CHECK: movb d, %cl
-; CHECK: sarl %cl
-; CHECK: movl $2, %ecx
+
define i32 @fn4(i32 %i) #0 {
+; CHECK-LABEL: fn4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: subl $8, %esp
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movzbl d, %ecx
+; CHECK-NEXT: movl %eax, %esi
+; CHECK-NEXT: sarl %cl, %esi
+; CHECK-NEXT: subl $8, %esp
+; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: movl $5, %edx
+; CHECK-NEXT: pushl %eax
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: calll fn3@PLT
+; CHECK-NEXT: addl $16, %esp
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testl %esi, %esi
+; CHECK-NEXT: setle %al
+; CHECK-NEXT: addl $8, %esp
+; CHECK-NEXT: popl %esi
+; CHECK-NEXT: retl
entry:
%0 = load i32, ptr @d, align 4
%shr = ashr i32 %i, %0
;
; X64-LABEL: foo:
; X64: # %bb.0: # %bb
-; X64-NEXT: movb var_27(%rip), %cl
+; X64-NEXT: movzbl var_27(%rip), %ecx
; X64-NEXT: movzwl var_22(%rip), %eax
; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; X64-NEXT: addb $30, %cl
; 686-NEXT: .cfi_def_cfa_register %ebp
; 686-NEXT: andl $-8, %esp
; 686-NEXT: subl $8, %esp
-; 686-NEXT: movb var_27, %cl
+; 686-NEXT: movzbl var_27, %ecx
; 686-NEXT: movzwl var_22, %eax
; 686-NEXT: movl %eax, (%esp)
; 686-NEXT: movl $0, {{[0-9]+}}(%esp)
define void @sum_unroll(ptr nocapture readonly, ptr nocapture) {
; CHECK-LABEL: sum_unroll:
; CHECK: # %bb.0:
-; CHECK-NEXT: movb _ZL1c(%rip), %al
+; CHECK-NEXT: movzbl _ZL1c(%rip), %eax
; CHECK-NEXT: movq (%rdi), %rcx
; CHECK-NEXT: addb $-1, %al
; CHECK-NEXT: adcq %rcx, (%rsi)
; CHECK-NEXT: cmpl %eax, var_21(%rip)
; CHECK-NEXT: setb %cl
; CHECK-NEXT: movl %ecx, var_390(%rip)
-; CHECK-NEXT: movb var_11(%rip), %al
+; CHECK-NEXT: movzbl var_11(%rip), %eax
; CHECK-NEXT: movb %al, var_370(%rip)
; CHECK-NEXT: retq
entry:
define dso_local void @PR35765() {
; CHECK-LABEL: PR35765:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movb s1(%rip), %cl
+; CHECK-NEXT: movzbl s1(%rip), %ecx
; CHECK-NEXT: addb $-118, %cl
; CHECK-NEXT: movl $4, %eax
; CHECK-NEXT: shll %cl, %eax
define void @f() {
; X64-LABEL: f:
; X64: # %bb.0: # %BB
-; X64-NEXT: movb (%rax), %al
+; X64-NEXT: movzbl (%rax), %eax
; X64-NEXT: cmpb $0, (%rax)
; X64-NEXT: setne (%rax)
; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
; X86-NEXT: .cfi_def_cfa_register %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $16, %esp
-; X86-NEXT: movb (%eax), %al
+; X86-NEXT: movzbl (%eax), %eax
; X86-NEXT: cmpb $0, (%eax)
; X86-NEXT: setne (%eax)
; X86-NEXT: leal -{{[0-9]+}}(%esp), %eax
define void @g() {
; X64-LABEL: g:
; X64: # %bb.0: # %BB
-; X64-NEXT: movb (%rax), %al
+; X64-NEXT: movzbl (%rax), %eax
; X64-NEXT: cmpb $0, (%rax)
; X64-NEXT: setne (%rax)
; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
; X86-NEXT: .cfi_def_cfa_register %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
-; X86-NEXT: movb (%eax), %al
+; X86-NEXT: movzbl (%eax), %eax
; X86-NEXT: cmpb $0, (%eax)
; X86-NEXT: setne (%eax)
; X86-NEXT: leal -{{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; CHECK-NEXT: movq %rax, (%rax)
-; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: movzwl -{{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %esi
-; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %dil
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %edi
; CHECK-NEXT: movb %al, (%rax)
; CHECK-NEXT: movq %rcx, 1(%rax)
; CHECK-NEXT: movw %dx, 9(%rax)
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl a
-; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
+; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
; CHECK-NEXT: movb %cl, %dh
; CHECK-NEXT: movl $0, h
; CHECK-NEXT: cmpb $8, %dl
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: movl $.str, (%esp)
; CHECK-NEXT: calll printf
-; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
+; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
; CHECK-NEXT: # implicit-def: $eax
; CHECK-NEXT: .LBB0_6: # %for.cond35
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: movl $0, 1(%rdi)
; CHECK-NEXT: movl -4(%rdi), %eax
; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb (%rdi), %al
+; CHECK-NEXT: movzbl (%rdi), %eax
; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movl 1(%rdi), %eax
; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movzwl 5(%rdi), %eax
; CHECK-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb 7(%rdi), %al
+; CHECK-NEXT: movzbl 7(%rdi), %eax
; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movl 8(%rdi), %eax
; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
; CHECK-NEXT: .Ltmp0:
; CHECK-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movb {{[0-9]+}}(%esp), %dl
+; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: notb %dl
; CHECK-NEXT: andb $1, %dl
; CHECK-NEXT: movzbl %dl, %edx
define void @atomic_maxmin_i8() {
; CHECK-LABEL: atomic_maxmin_i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: movb sc8(%rip), %al
+; CHECK-NEXT: movzbl sc8(%rip), %eax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_1: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: lock cmpxchgb %cl, sc8(%rip)
; CHECK-NEXT: jne .LBB0_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
-; CHECK-NEXT: movb sc8(%rip), %al
+; CHECK-NEXT: movzbl sc8(%rip), %eax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_3: # %atomicrmw.start2
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: lock cmpxchgb %cl, sc8(%rip)
; CHECK-NEXT: jne .LBB0_3
; CHECK-NEXT: # %bb.4: # %atomicrmw.end1
-; CHECK-NEXT: movb sc8(%rip), %al
+; CHECK-NEXT: movzbl sc8(%rip), %eax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_5: # %atomicrmw.start8
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: lock cmpxchgb %cl, sc8(%rip)
; CHECK-NEXT: jne .LBB0_5
; CHECK-NEXT: # %bb.6: # %atomicrmw.end7
-; CHECK-NEXT: movb sc8(%rip), %al
+; CHECK-NEXT: movzbl sc8(%rip), %eax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_7: # %atomicrmw.start14
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
define void @trunc_shl_7_i8_i64(ptr %out, ptr %in) {
; SSE2-LABEL: trunc_shl_7_i8_i64:
; SSE2: # %bb.0:
-; SSE2-NEXT: movb (%rsi), %al
+; SSE2-NEXT: movzbl (%rsi), %eax
; SSE2-NEXT: shlb $7, %al
; SSE2-NEXT: movb %al, (%rdi)
; SSE2-NEXT: retq
;
; AVX2-LABEL: trunc_shl_7_i8_i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: movb (%rsi), %al
+; AVX2-NEXT: movzbl (%rsi), %eax
; AVX2-NEXT: shlb $7, %al
; AVX2-NEXT: movb %al, (%rdi)
; AVX2-NEXT: retq
define i16 @foo(i16 %x, i16 %y, i16 %z) nounwind {
; X86-LABEL: foo:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rolw %cl, %ax
; X86-NEXT: retl
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $15, %cl
; X86-NEXT: shldw %cl, %dx, %ax
; X86-NEXT: retl
define i16 @un(i16 %x, i16 %y, i16 %z) nounwind {
; X86-LABEL: un:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorw %cl, %ax
; X86-NEXT: retl
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $15, %cl
; X86-NEXT: shrdw %cl, %dx, %ax
; X86-NEXT: retl
define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind readnone {
; CHECK32-LABEL: foo:
; CHECK32: # %bb.0: # %entry
-; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK32-NEXT: roll %cl, %eax
; CHECK32-NEXT: retl
define i32 @bar(i32 %x, i32 %y, i32 %z) nounwind readnone {
; CHECK32-LABEL: bar:
; CHECK32: # %bb.0: # %entry
-; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK32-NEXT: shldl %cl, %edx, %eax
define i32 @un(i32 %x, i32 %y, i32 %z) nounwind readnone {
; CHECK32-LABEL: un:
; CHECK32: # %bb.0: # %entry
-; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK32-NEXT: rorl %cl, %eax
; CHECK32-NEXT: retl
define i32 @bu(i32 %x, i32 %y, i32 %z) nounwind readnone {
; CHECK32-LABEL: bu:
; CHECK32: # %bb.0: # %entry
-; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK32-NEXT: shrdl %cl, %edx, %eax
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %edx
define i32 @rotl32(i32 %A, i8 %Amt) nounwind {
; X86-LABEL: rotl32:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: roll %cl, %eax
; X86-NEXT: retl
define i32 @rotr32(i32 %A, i8 %Amt) nounwind {
; X86-LABEL: rotr32:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorl %cl, %eax
; X86-NEXT: retl
define i16 @rotl16(i16 %A, i8 %Amt) nounwind {
; X86-LABEL: rotl16:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rolw %cl, %ax
; X86-NEXT: retl
define i16 @rotr16(i16 %A, i8 %Amt) nounwind {
; X86-LABEL: rotr16:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorw %cl, %ax
; X86-NEXT: retl
define i8 @rotl8(i8 %A, i8 %Amt) nounwind {
; X86-LABEL: rotl8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rolb %cl, %al
; X86-NEXT: retl
;
define i8 @rotr8(i8 %A, i8 %Amt) nounwind {
; X86-LABEL: rotr8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorb %cl, %al
; X86-NEXT: retl
;
define i8 @rotli8(i8 %A) nounwind {
; X86-LABEL: rotli8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rolb $5, %al
; X86-NEXT: retl
;
define i8 @rotri8(i8 %A) nounwind {
; X86-LABEL: rotri8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rolb $3, %al
; X86-NEXT: retl
;
define i8 @rotl1_8(i8 %A) nounwind {
; X86-LABEL: rotl1_8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rolb %al
; X86-NEXT: retl
;
define i8 @rotr1_8(i8 %A) nounwind {
; X86-LABEL: rotr1_8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorb %al
; X86-NEXT: retl
;
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl %esi, %eax
define i32 @rotate_left_32(i32 %a, i32 %b) {
; X86-LABEL: rotate_left_32:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: roll %cl, %eax
; X86-NEXT: retl
define i32 @rotate_right_32(i32 %a, i32 %b) {
; X86-LABEL: rotate_right_32:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorl %cl, %eax
; X86-NEXT: retl
; X86-NEXT: .cfi_offset %esi, -16
; X86-NEXT: .cfi_offset %edi, -12
; X86-NEXT: .cfi_offset %ebx, -8
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: .cfi_offset %esi, -16
; X86-NEXT: .cfi_offset %edi, -12
; X86-NEXT: .cfi_offset %ebx, -8
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %edx
define void @rotate_left_m32(ptr%pa, i32 %b) {
; X86-LABEL: rotate_left_m32:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: roll %cl, (%eax)
; X86-NEXT: retl
define void @rotate_right_m32(ptr%pa, i32 %b) {
; X86-LABEL: rotate_right_m32:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorl %cl, (%eax)
; X86-NEXT: retl
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %esi
; X86-NEXT: movl 4(%eax), %ebx
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %ebx
; X86-NEXT: movl 4(%eax), %esi
define i8 @rotate_left_8(i8 %x, i32 %amount) {
; X86-LABEL: rotate_left_8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rolb %cl, %al
; X86-NEXT: retl
;
define i8 @rotate_right_8(i8 %x, i32 %amount) {
; X86-LABEL: rotate_right_8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorb %cl, %al
; X86-NEXT: retl
;
define i16 @rotate_left_16(i16 %x, i32 %amount) {
; X86-LABEL: rotate_left_16:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rolw %cl, %ax
; X86-NEXT: retl
define i16 @rotate_right_16(i16 %x, i32 %amount) {
; X86-LABEL: rotate_right_16:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorw %cl, %ax
; X86-NEXT: retl
define void @rotate_left_m8(ptr %p, i32 %amount) {
; X86-LABEL: rotate_left_m8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rolb %cl, (%eax)
; X86-NEXT: retl
define void @rotate_right_m8(ptr %p, i32 %amount) {
; X86-LABEL: rotate_right_m8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorb %cl, (%eax)
; X86-NEXT: retl
define void @rotate_left_m16(ptr %p, i32 %amount) {
; X86-LABEL: rotate_left_m16:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rolw %cl, (%eax)
; X86-NEXT: retl
define void @rotate_right_m16(ptr %p, i32 %amount) {
; X86-LABEL: rotate_right_m16:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: rorw %cl, (%eax)
; X86-NEXT: retl
; X86-LABEL: rotate_demanded_bits:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $30, %cl
; X86-NEXT: roll %cl, %eax
; X86-NEXT: retl
; X86-LABEL: rotate_demanded_bits_2:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $23, %cl
; X86-NEXT: roll %cl, %eax
; X86-NEXT: retl
define i32 @rotate_demanded_bits_3(i32, i32) {
; X86-LABEL: rotate_demanded_bits_3:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addb %cl, %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: roll %cl, %eax
define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; X86-LABEL: func8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %eax, %edx
; X86-NEXT: addb %cl, %dl
; X86-NEXT: sarb $7, %dl
define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; X86-LABEL: func3:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addb {{[0-9]+}}(%esp), %al
; X86-NEXT: movzbl %al, %ecx
; X86-NEXT: cmpb $7, %al
define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
; X86-LABEL: func8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mulb {{[0-9]+}}(%esp)
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %eax, %edx
; X86-NEXT: addb %cl, %dl
; X86-NEXT: sarb $7, %dl
define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind {
; X86-LABEL: func4:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mulb {{[0-9]+}}(%esp)
; X86-NEXT: shlb $4, %al
; X86-NEXT: sarb $4, %al
define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind {
; SSE-LABEL: v1i8:
; SSE: # %bb.0:
-; SSE-NEXT: movb (%rdi), %al
-; SSE-NEXT: movb (%rsi), %cl
+; SSE-NEXT: movzbl (%rdi), %eax
+; SSE-NEXT: movzbl (%rsi), %ecx
; SSE-NEXT: leal (%rax,%rcx), %esi
; SSE-NEXT: sarb $7, %sil
; SSE-NEXT: addb $-128, %sil
;
; AVX-LABEL: v1i8:
; AVX: # %bb.0:
-; AVX-NEXT: movb (%rdi), %al
-; AVX-NEXT: movb (%rsi), %cl
+; AVX-NEXT: movzbl (%rdi), %eax
+; AVX-NEXT: movzbl (%rsi), %ecx
; AVX-NEXT: leal (%rax,%rcx), %esi
; AVX-NEXT: sarb $7, %sil
; AVX-NEXT: addb $-128, %sil
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shlb $4, %cl
; X86-NEXT: sarb $4, %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shlb $4, %dl
; X86-NEXT: sarb $4, %dl
; X86-NEXT: shlb $2, %dl
; X86: # %bb.0:
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shlb $4, %cl
; X86-NEXT: sarb $4, %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shlb $4, %dl
; X86-NEXT: sarb $4, %dl
; X86-NEXT: shlb $2, %dl
; MCU-NEXT: # kill: def $ah killed $ah killed $ax
; MCU-NEXT: sahf
; MCU-NEXT: seta %dl
-; MCU-NEXT: movb (%ecx,%edx,4), %al
+; MCU-NEXT: movzbl (%ecx,%edx,4), %eax
; MCU-NEXT: retl
entry:
%0 = fcmp olt double %F, 4.200000e+01
; ATHLON-NEXT: leal {{[0-9]+}}(%esp), %eax
; ATHLON-NEXT: leal {{[0-9]+}}(%esp), %ecx
; ATHLON-NEXT: cmovll %eax, %ecx
-; ATHLON-NEXT: movb (%ecx), %al
+; ATHLON-NEXT: movzbl (%ecx), %eax
; ATHLON-NEXT: retl
;
; MCU-LABEL: test18:
; ATHLON-LABEL: trunc_select_miscompile:
; ATHLON: ## %bb.0:
; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax
-; ATHLON-NEXT: movb {{[0-9]+}}(%esp), %cl
+; ATHLON-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; ATHLON-NEXT: orb $2, %cl
; ATHLON-NEXT: shll %cl, %eax
; ATHLON-NEXT: retl
;
; ATHLON-LABEL: select_uaddo_common_op0:
; ATHLON: ## %bb.0:
-; ATHLON-NEXT: movb {{[0-9]+}}(%esp), %al
+; ATHLON-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; ATHLON-NEXT: testb $1, {{[0-9]+}}(%esp)
; ATHLON-NEXT: leal {{[0-9]+}}(%esp), %ecx
; ATHLON-NEXT: leal {{[0-9]+}}(%esp), %edx
define void @test_i1_uge(ptr%A2) {
; CHECK-LABEL: test_i1_uge:
; CHECK: # %bb.0:
-; CHECK-NEXT: movb (%rdi), %al
+; CHECK-NEXT: movzbl (%rdi), %eax
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: xorb $1, %cl
; CHECK-NEXT: andb %cl, %al
define i16 @shift_and(i16 %a) {
; X86-LABEL: shift_and:
; X86: ## %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $4, %al
; X86-NEXT: shrb $2, %al
; X86-NEXT: movzbl %al, %eax
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s
define signext i8 @foo(i16 signext %x) nounwind {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: retl
%retval56 = trunc i16 %x to i8
ret i8 %retval56
-; CHECK-LABEL: foo:
-; CHECK: movb
-; CHECK-NEXT: retl
}
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: xorl %ecx, %ecx
-; X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X32-NEXT: subb %dl, %cl
; X32-NEXT: movl %esi, %edx
; X32-NEXT: shrl %cl, %edx
define i32 @t1(i32 %t, i32 %val) nounwind {
; X32-LABEL: t1:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: shll %cl, %eax
; X32-NEXT: retl
define i32 @t2(i32 %t, i32 %val) nounwind {
; X32-LABEL: t2:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: shll %cl, %eax
; X32-NEXT: retl
define void @t3(i16 %t) nounwind {
; X32-LABEL: t3:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: sarw %cl, X
; X32-NEXT: retl
;
; X32-LABEL: t4:
; X32: # %bb.0:
; X32-NEXT: pushl %esi
-; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl %esi, %edx
; X32-LABEL: t5:
; X32: # %bb.0:
; X32-NEXT: pushl %esi
-; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl %esi, %edx
; X32: # %bb.0:
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
-; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %edx
; X32-NEXT: movl 4(%eax), %edi
define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone {
; BMI2-LABEL: shl32:
; BMI2: # %bb.0:
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: retl
;
; BMI2-LABEL: shl32p:
; BMI2: # %bb.0:
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; BMI2-NEXT: shlxl %ecx, (%eax), %eax
; BMI2-NEXT: retl
;
; BMI2-NEXT: pushl %esi
; BMI2-NEXT: .cfi_def_cfa_offset 8
; BMI2-NEXT: .cfi_offset %esi, -8
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; BMI2-NEXT: shldl %cl, %eax, %edx
; BMI2-NEXT: pushl %esi
; BMI2-NEXT: .cfi_def_cfa_offset 8
; BMI2-NEXT: .cfi_offset %esi, -8
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: movl (%eax), %esi
; BMI2-NEXT: movl 4(%eax), %edx
define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
; BMI2-LABEL: lshr32:
; BMI2: # %bb.0:
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: retl
;
; BMI2-LABEL: lshr32p:
; BMI2: # %bb.0:
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; BMI2-NEXT: shrxl %ecx, (%eax), %eax
; BMI2-NEXT: retl
;
; BMI2-NEXT: pushl %esi
; BMI2-NEXT: .cfi_def_cfa_offset 8
; BMI2-NEXT: .cfi_offset %esi, -8
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; BMI2-NEXT: shrdl %cl, %edx, %eax
; BMI2-NEXT: pushl %esi
; BMI2-NEXT: .cfi_def_cfa_offset 8
; BMI2-NEXT: .cfi_offset %esi, -8
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; BMI2-NEXT: movl (%edx), %eax
; BMI2-NEXT: movl 4(%edx), %edx
define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
; BMI2-LABEL: ashr32:
; BMI2: # %bb.0:
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: sarxl %eax, {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: retl
;
; BMI2-LABEL: ashr32p:
; BMI2: # %bb.0:
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; BMI2-NEXT: sarxl %ecx, (%eax), %eax
; BMI2-NEXT: retl
;
; BMI2-NEXT: pushl %esi
; BMI2-NEXT: .cfi_def_cfa_offset 8
; BMI2-NEXT: .cfi_offset %esi, -8
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; BMI2-NEXT: shrdl %cl, %edx, %eax
; BMI2-NEXT: pushl %esi
; BMI2-NEXT: .cfi_def_cfa_offset 8
; BMI2-NEXT: .cfi_offset %esi, -8
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; BMI2-NEXT: movl (%edx), %eax
; BMI2-NEXT: movl 4(%edx), %edx
define i32 @shl32and(i32 %t, i32 %val) nounwind {
; BMI2-LABEL: shl32and:
; BMI2: # %bb.0:
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: retl
;
; BMI2-LABEL: shl64and:
; BMI2: # %bb.0:
; BMI2-NEXT: pushl %esi
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; BMI2-NEXT: shldl %cl, %eax, %edx
define i32 @lshr32and(i32 %t, i32 %val) nounwind {
; BMI2-LABEL: lshr32and:
; BMI2: # %bb.0:
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: retl
;
; BMI2-LABEL: lshr64and:
; BMI2: # %bb.0:
; BMI2-NEXT: pushl %esi
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; BMI2-NEXT: shrdl %cl, %edx, %eax
define i32 @ashr32and(i32 %t, i32 %val) nounwind {
; BMI2-LABEL: ashr32and:
; BMI2: # %bb.0:
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: sarxl %eax, {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: retl
;
; BMI2-LABEL: ashr64and:
; BMI2: # %bb.0:
; BMI2-NEXT: pushl %esi
-; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; BMI2-NEXT: shrdl %cl, %edx, %eax
define i32 @t0_shl(i32 %x, i8 %shamt) nounwind {
; X86-LABEL: t0_shl:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
define i32 @t1_lshr(i32 %x, i8 %shamt) nounwind {
; X86-LABEL: t1_lshr:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
define i32 @t2_ashr(i32 %x, i8 %shamt) nounwind {
; X86-LABEL: t2_ashr:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sarxl %eax, {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shldl %cl, %edx, %eax
; X86-NEXT: retl
;
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shrdl %cl, %edx, %eax
; X86-NEXT: retl
;
; CHECK-NEXT: mov esi, dword ptr [esp + 8]
; CHECK-NEXT: mov edx, dword ptr [esp + 12]
; CHECK-NEXT: mov eax, dword ptr [esp + 16]
-; CHECK-NEXT: mov cl, byte ptr [eax]
+; CHECK-NEXT: movzx ecx, byte ptr [eax]
; CHECK-NEXT: mov eax, esi
; CHECK-NEXT: shl eax, cl
; CHECK-NEXT: shld edx, esi, cl
define i64 @ashr_add_shl_i8(i64 %r) nounwind {
; X32-LABEL: ashr_add_shl_i8:
; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: addb $2, %al
; X32-NEXT: movsbl %al, %eax
; X32-NEXT: movl %eax, %edx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X32-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movb {{[0-9]+}}(%esp), %ch
; X32-NEXT: movb {{[0-9]+}}(%esp), %dh
; X32-NEXT: incb %dh
; X86-LABEL: test1:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %esi, %eax
; X86-LABEL: test2:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %edx
; X86-LABEL: test3:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %edx
define i32 @test4(i32 %A, i32 %B, i8 %C) nounwind {
; X86-LABEL: test4:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shldl %cl, %edx, %eax
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $15, %cl
; X86-NEXT: shldw %cl, %dx, %ax
; X86-NEXT: retl
define i32 @test6(i32 %A, i32 %B, i8 %C) nounwind {
; X86-LABEL: test6:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrdl %cl, %edx, %eax
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $15, %cl
; X86-NEXT: shrdw %cl, %dx, %ax
; X86-NEXT: retl
; X86-LABEL: test8:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %esi, %eax
define i64 @test9(i64 %val, i32 %bits) nounwind {
; X86-LABEL: test9:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shrdl %cl, %edx, %eax
define i64 @test10(i64 %val, i32 %bits) nounwind {
; X86-LABEL: test10:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shrdl %cl, %edx, %eax
define i32 @test11(i32 %hi, i32 %lo, i32 %bits) nounwind {
; X86-LABEL: test11:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shldl %cl, %edx, %eax
define i32 @test12(i32 %hi, i32 %lo, i32 %bits) nounwind {
; X86-LABEL: test12:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrdl %cl, %edx, %eax
define i32 @test13(i32 %hi, i32 %lo, i32 %bits) nounwind {
; X86-LABEL: test13:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shldl %cl, %edx, %eax
define i32 @test14(i32 %hi, i32 %lo, i32 %bits) nounwind {
; X86-LABEL: test14:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrdl %cl, %edx, %eax
define i32 @test15(i32 %hi, i32 %lo, i32 %bits) nounwind {
; X86-LABEL: test15:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shldl %cl, %edx, %eax
define i32 @test16(i32 %hi, i32 %lo, i32 %bits) nounwind {
; X86-LABEL: test16:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrdl %cl, %edx, %eax
define i32 @test17(i32 %hi, i32 %lo, i32 %bits) nounwind {
; X86-LABEL: test17:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrdl %cl, %edx, %eax
define i32 @test18(i32 %hi, i32 %lo, i32 %bits) nounwind {
; X86-LABEL: test18:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shldl %cl, %edx, %eax
; X86-LABEL: not_shld_i32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shll %cl, %edx
; X86-NEXT: negb %cl
; X86-LABEL: not_shrd_i32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shrl %cl, %edx
; X86-NEXT: negb %cl
; i686-NEXT: pushl %esi
; i686-NEXT: subl $20, %esp
; i686-NEXT: movl {{[0-9]+}}(%esp), %edx
-; i686-NEXT: movb {{[0-9]+}}(%esp), %al
+; i686-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; i686-NEXT: movl {{[0-9]+}}(%esp), %edi
; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp
; i686-NEXT: movl %ebp, %esi
; i686-NEXT: pushl %esi
; i686-NEXT: subl $24, %esp
; i686-NEXT: movl {{[0-9]+}}(%esp), %edx
-; i686-NEXT: movb {{[0-9]+}}(%esp), %al
+; i686-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx
; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp
; i686-NEXT: movl %ebp, %esi
; i686-NEXT: subl $20, %esp
; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp
; i686-NEXT: movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT: movb {{[0-9]+}}(%esp), %al
+; i686-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx
; i686-NEXT: movl %eax, %ecx
; i686-NEXT: shll %cl, %ebx
; x86_64: # %bb.0: # %entry
; x86_64-NEXT: movq %rcx, %rax
; x86_64-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; x86_64-NEXT: movb {{[0-9]+}}(%rsp), %r9b
+; x86_64-NEXT: movzbl {{[0-9]+}}(%rsp), %r9d
; x86_64-NEXT: movl %r9d, %ecx
; x86_64-NEXT: shrdq %cl, %rax, %rdx
; x86_64-NEXT: movl %r8d, %ecx
; x86_64: # %bb.0: # %entry
; x86_64-NEXT: movq %rcx, %r11
; x86_64-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; x86_64-NEXT: movb {{[0-9]+}}(%rsp), %r9b
+; x86_64-NEXT: movzbl {{[0-9]+}}(%rsp), %r9d
; x86_64-NEXT: movl %r9d, %ecx
; x86_64-NEXT: shrdq %cl, %r11, %rdx
; x86_64-NEXT: movl %r8d, %ecx
; i686-NEXT: .LBB8_29: # %entry
; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
; i686-NEXT: movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; i686-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; i686-NEXT: movl {{[0-9]+}}(%esp), %esi
; i686-NEXT: jne .LBB8_30
; i686-NEXT: # %bb.31: # %entry
; i686-NEXT: .LBB8_34: # %entry
; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; i686-NEXT: .LBB8_35: # %entry
-; i686-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; i686-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx
; i686-NEXT: shrdl %cl, %ebx, %esi
; i686-NEXT: testb $32, %cl
; x86_64: # %bb.0: # %entry
; x86_64-NEXT: movq %rcx, %rax
; x86_64-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; x86_64-NEXT: movb {{[0-9]+}}(%rsp), %r9b
+; x86_64-NEXT: movzbl {{[0-9]+}}(%rsp), %r9d
; x86_64-NEXT: movl %r9d, %ecx
; x86_64-NEXT: shldq %cl, %rdx, %rax
; x86_64-NEXT: movl %r8d, %ecx
define i8 @test_i8_shl_lshr_0(i8 %a0) {
; X86-LABEL: test_i8_shl_lshr_0:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $-8, %al
; X86-NEXT: retl
;
define i8 @test_i8_shl_lshr_1(i8 %a0) {
; X86-LABEL: test_i8_shl_lshr_1:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlb $2, %al
; X86-NEXT: andb $-32, %al
; X86-NEXT: retl
define i8 @test_i8_shl_lshr_2(i8 %a0) {
; X86-LABEL: test_i8_shl_lshr_2:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrb $2, %al
; X86-NEXT: andb $56, %al
; X86-NEXT: retl
define i8 @test_i8_lshr_lshr_0(i8 %a0) {
; X86-LABEL: test_i8_lshr_lshr_0:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $31, %al
; X86-NEXT: retl
;
define i8 @test_i8_lshr_lshr_1(i8 %a0) {
; X86-LABEL: test_i8_lshr_lshr_1:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrb $2, %al
; X86-NEXT: andb $7, %al
; X86-NEXT: retl
define i8 @test_i8_lshr_lshr_2(i8 %a0) {
; X86-LABEL: test_i8_lshr_lshr_2:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlb $2, %al
; X86-NEXT: andb $28, %al
; X86-NEXT: retl
;
; X86-LABEL: func3:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlb $4, %al
; X86-NEXT: sarb $4, %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shlb $4, %cl
; X86-NEXT: sarb $4, %cl
; X86-NEXT: movsbl %cl, %ecx
;
; X86-LABEL: func6:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlb $4, %al
; X86-NEXT: sarb $4, %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shlb $4, %cl
; X86-NEXT: sarb $4, %cl
; X86-NEXT: mulb %cl
;
; X86-LABEL: func3:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlb $4, %al
; X86-NEXT: sarb $4, %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shlb $4, %cl
; X86-NEXT: movsbl %cl, %ecx
; X86-NEXT: movsbl %al, %eax
;
; X86-LABEL: func6:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shlb $4, %cl
; X86-NEXT: sarb $4, %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlb $4, %al
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: movb %al, %ah
define i1 @test_srem_even(i4 %X) nounwind {
; X86-LABEL: test_srem_even:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shlb $4, %cl
; X86-NEXT: sarb $4, %cl
define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; X86-LABEL: test_srem_pow2_setne:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: shlb $2, %cl
; X86-NEXT: sarb $5, %cl
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm1
; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl
-; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_srem_vec:
;
; X86-AVX512-LABEL: add_ss_mask:
; X86-AVX512: # %bb.0:
-; X86-AVX512-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-AVX512-NEXT: kmovw %eax, %k1
; X86-AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1}
; X86-AVX512-NEXT: vmovaps %xmm2, %xmm0
;
; X86-AVX512-LABEL: add_sd_mask:
; X86-AVX512: # %bb.0:
-; X86-AVX512-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-AVX512-NEXT: kmovw %eax, %k1
; X86-AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
; X86-AVX512-NEXT: vmovapd %xmm2, %xmm0
;
; X86-AVX512-LABEL: test_mm_set1_epi8:
; X86-AVX512: # %bb.0:
-; X86-AVX512-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
+; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
; X86-AVX512-NEXT: vpbroadcastb %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc0]
; X86-AVX512-NEXT: retl # encoding: [0xc3]
;
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: shll %cl, %esi
; X86-NEXT: movswl %si, %edi
; X86-LABEL: func2:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl %eax, %eax
; X86-NEXT: movl %eax, %edx
; X86-LABEL: func4:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $15, %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shlb $4, %dl
; X86-NEXT: movb %dl, %ch
; X86-NEXT: shlb %cl, %ch
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %edx, %ebx
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movswl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shll $14, %edx
; X86-NEXT: movl %edx, %esi
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: shll %cl, %esi
; X86-LABEL: func8:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movb %dl, %ch
; X86-NEXT: shlb %cl, %ch
; X86-NEXT: movzbl %ch, %esi
; X86-NEXT: cmpl %eax, %edi
; X86-NEXT: cmovel %ebp, %edx
; X86-NEXT: movl %esi, %edi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shll %cl, %edi
; X86-NEXT: movl %edi, %ebp
; X86-NEXT: sarl %cl, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: cmovel %edi, %eax
; X86-NEXT: movl %esi, %edi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shll %cl, %edi
; X86-NEXT: movl %edi, %ebp
; X86-NEXT: sarl %cl, %ebp
define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; X86-LABEL: func8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpb %dl, %al
; X86-NEXT: setns %cl
define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; X86-LABEL: func3:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: subb {{[0-9]+}}(%esp), %al
; X86-NEXT: movzbl %al, %ecx
; X86-NEXT: cmpb $7, %al
define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
; X86-LABEL: func8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mulb {{[0-9]+}}(%esp)
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpb %al, %dl
define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind {
; X86-LABEL: func4:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mulb {{[0-9]+}}(%esp)
; X86-NEXT: shlb $4, %al
; X86-NEXT: sarb $4, %al
define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind {
; SSE-LABEL: v1i8:
; SSE: # %bb.0:
-; SSE-NEXT: movb (%rdi), %al
-; SSE-NEXT: movb (%rsi), %cl
+; SSE-NEXT: movzbl (%rdi), %eax
+; SSE-NEXT: movzbl (%rsi), %ecx
; SSE-NEXT: xorl %esi, %esi
; SSE-NEXT: cmpb %cl, %al
; SSE-NEXT: setns %sil
;
; AVX-LABEL: v1i8:
; AVX: # %bb.0:
-; AVX-NEXT: movb (%rdi), %al
-; AVX-NEXT: movb (%rsi), %cl
+; AVX-NEXT: movzbl (%rdi), %eax
+; AVX-NEXT: movzbl (%rsi), %ecx
; AVX-NEXT: xorl %esi, %esi
; AVX-NEXT: cmpb %cl, %al
; AVX-NEXT: setns %sil
; X64-NEXT: movb %sil, (%rdi)
; X64-NEXT: retq
;
-; X86-LABEL: test1:
-; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movb %al, (%ecx)
-; X86-NEXT: retl
+; X86-BWON-LABEL: test1:
+; X86-BWON: ## %bb.0: ## %entry
+; X86-BWON-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BWON-NEXT: movb %al, (%ecx)
+; X86-BWON-NEXT: retl
+;
+; X86-BWOFF-LABEL: test1:
+; X86-BWOFF: ## %bb.0: ## %entry
+; X86-BWOFF-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BWOFF-NEXT: movb %al, (%ecx)
+; X86-BWOFF-NEXT: retl
entry:
%A = load i32, ptr %a0, align 4
%B = and i32 %A, -256 ; 0xFFFFFF00
; X64-NEXT: movb %sil, 1(%rdi)
; X64-NEXT: retq
;
-; X86-LABEL: test2:
-; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movb %al, 1(%ecx)
-; X86-NEXT: retl
+; X86-BWON-LABEL: test2:
+; X86-BWON: ## %bb.0: ## %entry
+; X86-BWON-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BWON-NEXT: movb %al, 1(%ecx)
+; X86-BWON-NEXT: retl
+;
+; X86-BWOFF-LABEL: test2:
+; X86-BWOFF: ## %bb.0: ## %entry
+; X86-BWOFF-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BWOFF-NEXT: movb %al, 1(%ecx)
+; X86-BWOFF-NEXT: retl
entry:
%A = load i32, ptr %a0, align 4
%B = and i32 %A, -65281 ; 0xFFFF00FF
; X64-NEXT: movb %sil, 5(%rdi)
; X64-NEXT: retq
;
-; X86-LABEL: test6:
-; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movb %al, 5(%ecx)
-; X86-NEXT: retl
+; X86-BWON-LABEL: test6:
+; X86-BWON: ## %bb.0: ## %entry
+; X86-BWON-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BWON-NEXT: movb %al, 5(%ecx)
+; X86-BWON-NEXT: retl
+;
+; X86-BWOFF-LABEL: test6:
+; X86-BWOFF: ## %bb.0: ## %entry
+; X86-BWOFF-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BWOFF-NEXT: movb %al, 5(%ecx)
+; X86-BWOFF-NEXT: retl
entry:
%A = load i64, ptr %a0, align 4
%B = and i64 %A, -280375465082881 ; 0xFFFF00FFFFFFFFFF
; X64-NEXT: movb %sil, 5(%rdi)
; X64-NEXT: retq
;
-; X86-LABEL: test7:
-; X86: ## %bb.0: ## %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl (%eax), %eax
-; X86-NEXT: movb %cl, 5(%edx)
-; X86-NEXT: retl
+; X86-BWON-LABEL: test7:
+; X86-BWON: ## %bb.0: ## %entry
+; X86-BWON-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BWON-NEXT: movl (%eax), %eax
+; X86-BWON-NEXT: movb %cl, 5(%edx)
+; X86-BWON-NEXT: retl
+;
+; X86-BWOFF-LABEL: test7:
+; X86-BWOFF: ## %bb.0: ## %entry
+; X86-BWOFF-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BWOFF-NEXT: movl (%eax), %eax
+; X86-BWOFF-NEXT: movb %cl, 5(%edx)
+; X86-BWOFF-NEXT: retl
entry:
%OtherLoad = load i32 , ptr%P2
%A = load i64, ptr %a0, align 4
; X86-NEXT: .LBB2_2: # %compare
; X86-NEXT: movdqa %xmm0, (%esp)
; X86-NEXT: andl $15, %ecx
-; X86-NEXT: movb (%esp,%ecx), %al
+; X86-NEXT: movzbl (%esp,%ecx), %eax
; X86-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp)
; X86-NEXT: subb 16(%esp,%ecx), %al
; X86-NEXT: .LBB2_3: # %exit
; X64-NEXT: .LBB2_2: # %compare
; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: andl $15, %ecx
-; X64-NEXT: movb -24(%rsp,%rcx), %al
+; X64-NEXT: movzbl -24(%rsp,%rcx), %eax
; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
; X64-NEXT: subb -40(%rsp,%rcx), %al
; X64-NEXT: movzbl %al, %eax
; X86-NEXT: .LBB5_2: # %compare
; X86-NEXT: movdqa %xmm1, (%esp)
; X86-NEXT: andl $15, %ecx
-; X86-NEXT: movb (%esp,%ecx), %al
+; X86-NEXT: movzbl (%esp,%ecx), %eax
; X86-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp)
; X86-NEXT: subb 16(%esp,%ecx), %al
; X86-NEXT: .LBB5_3: # %exit
; X64-NEXT: .LBB5_2: # %compare
; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
; X64-NEXT: andl $15, %ecx
-; X64-NEXT: movb -24(%rsp,%rcx), %al
+; X64-NEXT: movzbl -24(%rsp,%rcx), %eax
; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: subb -40(%rsp,%rcx), %al
; X64-NEXT: movzbl %al, %eax
; X86-NEXT: subl $48, %esp
; X86-NEXT: movdqa %xmm0, (%esp)
; X86-NEXT: andl $15, %ecx
-; X86-NEXT: movb (%esp,%ecx), %al
+; X86-NEXT: movzbl (%esp,%ecx), %eax
; X86-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp)
; X86-NEXT: subb 16(%esp,%ecx), %al
; X86-NEXT: movl %ebp, %esp
; X64-NEXT: .LBB14_2: # %compare
; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: andl $15, %ecx
-; X64-NEXT: movb -24(%rsp,%rcx), %al
+; X64-NEXT: movzbl -24(%rsp,%rcx), %eax
; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
; X64-NEXT: subb -40(%rsp,%rcx), %al
; X64-NEXT: movzbl %al, %eax
; X86-NEXT: .LBB17_2: # %compare
; X86-NEXT: movdqa %xmm1, (%esp)
; X86-NEXT: andl $15, %ecx
-; X86-NEXT: movb (%esp,%ecx), %al
+; X86-NEXT: movzbl (%esp,%ecx), %eax
; X86-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp)
; X86-NEXT: subb 16(%esp,%ecx), %al
; X86-NEXT: .LBB17_3: # %exit
; X64-NEXT: .LBB17_2: # %compare
; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
; X64-NEXT: andl $15, %ecx
-; X64-NEXT: movb -24(%rsp,%rcx), %al
+; X64-NEXT: movzbl -24(%rsp,%rcx), %eax
; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: subb -40(%rsp,%rcx), %al
; X64-NEXT: movzbl %al, %eax
define i8 @scalar_i8(i8 %x, i8 %y) nounwind {
; X86-LABEL: scalar_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addb {{[0-9]+}}(%esp), %al
; X86-NEXT: incb %al
; X86-NEXT: retl
; CHECK-APPLE-NEXT: testq %r12, %r12
; CHECK-APPLE-NEXT: jne LBB1_2
; CHECK-APPLE-NEXT: ## %bb.1: ## %cont
-; CHECK-APPLE-NEXT: movb 8(%rdi), %al
+; CHECK-APPLE-NEXT: movzbl 8(%rdi), %eax
; CHECK-APPLE-NEXT: movb %al, (%rbx)
; CHECK-APPLE-NEXT: LBB1_2: ## %handler
; CHECK-APPLE-NEXT: callq _free
; CHECK-i386-NEXT: jne LBB1_2
; CHECK-i386-NEXT: ## %bb.1: ## %cont
; CHECK-i386-NEXT: movl 16(%esp), %ecx
-; CHECK-i386-NEXT: movb 8(%eax), %dl
+; CHECK-i386-NEXT: movzbl 8(%eax), %edx
; CHECK-i386-NEXT: movb %dl, (%ecx)
; CHECK-i386-NEXT: LBB1_2: ## %handler
; CHECK-i386-NEXT: movl %eax, (%esp)
; CHECK-APPLE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-APPLE-NEXT: jbe LBB2_1
; CHECK-APPLE-NEXT: ## %bb.3: ## %bb_end
-; CHECK-APPLE-NEXT: movb 8(%r12), %al
+; CHECK-APPLE-NEXT: movzbl 8(%r12), %eax
; CHECK-APPLE-NEXT: movb %al, (%rbx)
; CHECK-APPLE-NEXT: LBB2_4: ## %handler
; CHECK-APPLE-NEXT: movq %r12, %rdi
; CHECK-i386-NEXT: sahf
; CHECK-i386-NEXT: jbe LBB2_1
; CHECK-i386-NEXT: ## %bb.3: ## %bb_end
-; CHECK-i386-NEXT: movb 8(%ecx), %al
+; CHECK-i386-NEXT: movzbl 8(%ecx), %eax
; CHECK-i386-NEXT: movb %al, (%esi)
; CHECK-i386-NEXT: fldz
; CHECK-i386-NEXT: LBB2_4: ## %handler
; CHECK-APPLE-NEXT: testq %r12, %r12
; CHECK-APPLE-NEXT: jne LBB6_2
; CHECK-APPLE-NEXT: ## %bb.1: ## %cont
-; CHECK-APPLE-NEXT: movb 8(%rdi), %al
+; CHECK-APPLE-NEXT: movzbl 8(%rdi), %eax
; CHECK-APPLE-NEXT: movb %al, (%rbx)
; CHECK-APPLE-NEXT: LBB6_2: ## %handler
; CHECK-APPLE-NEXT: callq _free
; CHECK-i386-NEXT: jne LBB6_2
; CHECK-i386-NEXT: ## %bb.1: ## %cont
; CHECK-i386-NEXT: movl 48(%esp), %ecx
-; CHECK-i386-NEXT: movb 8(%eax), %dl
+; CHECK-i386-NEXT: movzbl 8(%eax), %edx
; CHECK-i386-NEXT: movb %dl, (%ecx)
; CHECK-i386-NEXT: LBB6_2: ## %handler
; CHECK-i386-NEXT: movl %eax, (%esp)
; CHECK-APPLE-NEXT: testq %r12, %r12
; CHECK-APPLE-NEXT: jne LBB7_2
; CHECK-APPLE-NEXT: ## %bb.1: ## %cont
-; CHECK-APPLE-NEXT: movb 8(%rdi), %al
+; CHECK-APPLE-NEXT: movzbl 8(%rdi), %eax
; CHECK-APPLE-NEXT: movb %al, (%rbx)
; CHECK-APPLE-NEXT: LBB7_2: ## %handler
; CHECK-APPLE-NEXT: callq _free
; CHECK-APPLE-NEXT: testq %r12, %r12
; CHECK-APPLE-NEXT: jne LBB7_4
; CHECK-APPLE-NEXT: ## %bb.3: ## %cont2
-; CHECK-APPLE-NEXT: movb 8(%rdi), %al
+; CHECK-APPLE-NEXT: movzbl 8(%rdi), %eax
; CHECK-APPLE-NEXT: movb %al, (%r14)
; CHECK-APPLE-NEXT: LBB7_4: ## %handler2
; CHECK-APPLE-NEXT: callq _free
; CHECK-i386-NEXT: jne LBB7_2
; CHECK-i386-NEXT: ## %bb.1: ## %cont
; CHECK-i386-NEXT: movl 8(%ebp), %ecx
-; CHECK-i386-NEXT: movb 8(%eax), %dl
+; CHECK-i386-NEXT: movzbl 8(%eax), %edx
; CHECK-i386-NEXT: movb %dl, (%ecx)
; CHECK-i386-NEXT: LBB7_2: ## %handler
; CHECK-i386-NEXT: subl $12, %esp
; CHECK-i386-NEXT: jne LBB7_4
; CHECK-i386-NEXT: ## %bb.3: ## %cont2
; CHECK-i386-NEXT: movl 12(%ebp), %ecx
-; CHECK-i386-NEXT: movb 8(%eax), %dl
+; CHECK-i386-NEXT: movzbl 8(%eax), %edx
; CHECK-i386-NEXT: movb %dl, (%ecx)
; CHECK-i386-NEXT: LBB7_4: ## %handler2
; CHECK-i386-NEXT: subl $12, %esp
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB3_9
; CHECK-NEXT: # %bb.1: # %entry
-; CHECK-NEXT: movb 0, %bl
+; CHECK-NEXT: movzbl 0, %ebx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB3_8
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-linux-gnu | FileCheck -check-prefix=X86_LINUX %s
; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
; RUN: llc < %s -mtriple=i386-linux-gnu -fast-isel | FileCheck -check-prefix=X86_ISEL_LINUX %s
define dso_local i32 @f1() {
; X86_LINUX-LABEL: f1:
-; X86_LINUX: movl %gs:i1@NTPOFF, %eax
-; X86_LINUX-NEXT: ret
+; X86_LINUX: # %bb.0: # %entry
+; X86_LINUX-NEXT: movl %gs:i1@NTPOFF, %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f1:
-; X64_LINUX: movl %fs:i1@TPOFF, %eax
-; X64_LINUX-NEXT: ret
+; X64_LINUX: # %bb.0: # %entry
+; X64_LINUX-NEXT: movl %fs:i1@TPOFF, %eax
+; X64_LINUX-NEXT: retq
+;
+; X86_ISEL_LINUX-LABEL: f1:
+; X86_ISEL_LINUX: # %bb.0: # %entry
+; X86_ISEL_LINUX-NEXT: movl %gs:i1@NTPOFF, %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
+; X64_ISEL_LINUX-LABEL: f1:
+; X64_ISEL_LINUX: # %bb.0: # %entry
+; X64_ISEL_LINUX-NEXT: movl %fs:i1@TPOFF, %eax
+; X64_ISEL_LINUX-NEXT: retq
+;
; X86_WIN-LABEL: f1:
-; X86_WIN: movl __tls_index, %eax
-; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT: movl _i1@SECREL32(%eax), %eax
-; X86_WIN-NEXT: ret
+; X86_WIN: # %bb.0: # %entry
+; X86_WIN-NEXT: movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movl _i1@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
; X64_WIN-LABEL: f1:
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN-NEXT: movq %gs:88, %rcx
-; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movl i1@SECREL32(%rax), %eax
-; X64_WIN-NEXT: ret
-; MINGW32-LABEL: _f1:
-; MINGW32: movl __tls_index, %eax
-; MINGW32-NEXT: movl %fs:44, %ecx
-; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT: movl _i1@SECREL32(%eax), %eax
-; MINGW32-NEXT: retl
+; X64_WIN: # %bb.0: # %entry
+; X64_WIN-NEXT: movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i1@SECREL32(%rax), %eax
+; X64_WIN-NEXT: retq
+;
+; MINGW32-LABEL: f1:
+; MINGW32: # %bb.0: # %entry
+; MINGW32-NEXT: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movl _i1@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
entry:
%tmp1 = load i32, ptr @i1
define dso_local ptr @f2() {
; X86_LINUX-LABEL: f2:
-; X86_LINUX: movl %gs:0, %eax
-; X86_LINUX-NEXT: leal i1@NTPOFF(%eax), %eax
-; X86_LINUX-NEXT: ret
+; X86_LINUX: # %bb.0: # %entry
+; X86_LINUX-NEXT: movl %gs:0, %eax
+; X86_LINUX-NEXT: leal i1@NTPOFF(%eax), %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f2:
-; X64_LINUX: movq %fs:0, %rax
-; X64_LINUX-NEXT: leaq i1@TPOFF(%rax), %rax
-; X64_LINUX-NEXT: ret
+; X64_LINUX: # %bb.0: # %entry
+; X64_LINUX-NEXT: movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i1@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: retq
+;
+; X86_ISEL_LINUX-LABEL: f2:
+; X86_ISEL_LINUX: # %bb.0: # %entry
+; X86_ISEL_LINUX-NEXT: movl %gs:0, %eax
+; X86_ISEL_LINUX-NEXT: leal i1@NTPOFF(%eax), %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
+; X64_ISEL_LINUX-LABEL: f2:
+; X64_ISEL_LINUX: # %bb.0: # %entry
+; X64_ISEL_LINUX-NEXT: movq %fs:0, %rax
+; X64_ISEL_LINUX-NEXT: leaq i1@TPOFF(%rax), %rax
+; X64_ISEL_LINUX-NEXT: retq
+;
; X86_WIN-LABEL: f2:
-; X86_WIN: movl __tls_index, %eax
-; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT: leal _i1@SECREL32(%eax), %eax
-; X86_WIN-NEXT: ret
+; X86_WIN: # %bb.0: # %entry
+; X86_WIN-NEXT: movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: leal _i1@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
; X64_WIN-LABEL: f2:
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN-NEXT: movq %gs:88, %rcx
-; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: leaq i1@SECREL32(%rax), %rax
-; X64_WIN-NEXT: ret
-; MINGW32-LABEL: _f2:
-; MINGW32: movl __tls_index, %eax
-; MINGW32-NEXT: movl %fs:44, %ecx
-; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT: leal _i1@SECREL32(%eax), %eax
-; MINGW32-NEXT: retl
+; X64_WIN: # %bb.0: # %entry
+; X64_WIN-NEXT: movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i1@SECREL32(%rax), %rax
+; X64_WIN-NEXT: retq
+;
+; MINGW32-LABEL: f2:
+; MINGW32: # %bb.0: # %entry
+; MINGW32-NEXT: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: leal _i1@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
entry:
ret ptr @i1
define dso_local i32 @f3() nounwind {
; X86_LINUX-LABEL: f3:
-; X86_LINUX: movl i2@INDNTPOFF, %eax
-; X86_LINUX-NEXT: movl %gs:(%eax), %eax
-; X86_LINUX-NEXT: ret
+; X86_LINUX: # %bb.0: # %entry
+; X86_LINUX-NEXT: movl i2@INDNTPOFF, %eax
+; X86_LINUX-NEXT: movl %gs:(%eax), %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f3:
-; X64_LINUX: movq i2@GOTTPOFF(%rip), %rax
-; X64_LINUX-NEXT: movl %fs:(%rax), %eax
-; X64_LINUX-NEXT: ret
+; X64_LINUX: # %bb.0: # %entry
+; X64_LINUX-NEXT: movq i2@GOTTPOFF(%rip), %rax
+; X64_LINUX-NEXT: movl %fs:(%rax), %eax
+; X64_LINUX-NEXT: retq
+;
+; X86_ISEL_LINUX-LABEL: f3:
+; X86_ISEL_LINUX: # %bb.0: # %entry
+; X86_ISEL_LINUX-NEXT: movl i2@INDNTPOFF, %eax
+; X86_ISEL_LINUX-NEXT: movl %gs:(%eax), %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
+; X64_ISEL_LINUX-LABEL: f3:
+; X64_ISEL_LINUX: # %bb.0: # %entry
+; X64_ISEL_LINUX-NEXT: movq i2@GOTTPOFF(%rip), %rax
+; X64_ISEL_LINUX-NEXT: movl %fs:(%rax), %eax
+; X64_ISEL_LINUX-NEXT: retq
+;
; X86_WIN-LABEL: f3:
-; X86_WIN: movl __tls_index, %eax
-; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT: movl _i2@SECREL32(%eax), %eax
-; X86_WIN-NEXT: ret
+; X86_WIN: # %bb.0: # %entry
+; X86_WIN-NEXT: movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movl _i2@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
; X64_WIN-LABEL: f3:
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN-NEXT: movq %gs:88, %rcx
-; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movl i2@SECREL32(%rax), %eax
-; X64_WIN-NEXT: ret
-; MINGW32-LABEL: _f3:
-; MINGW32: movl __tls_index, %eax
-; MINGW32-NEXT: movl %fs:44, %ecx
-; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT: movl _i2@SECREL32(%eax), %eax
-; MINGW32-NEXT: retl
+; X64_WIN: # %bb.0: # %entry
+; X64_WIN-NEXT: movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i2@SECREL32(%rax), %eax
+; X64_WIN-NEXT: retq
+;
+; MINGW32-LABEL: f3:
+; MINGW32: # %bb.0: # %entry
+; MINGW32-NEXT: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movl _i2@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
entry:
%tmp1 = load i32, ptr @i2
define dso_local ptr @f4() {
; X86_LINUX-LABEL: f4:
-; X86_LINUX: movl %gs:0, %eax
-; X86_LINUX-NEXT: addl i2@INDNTPOFF, %eax
-; X86_LINUX-NEXT: ret
+; X86_LINUX: # %bb.0: # %entry
+; X86_LINUX-NEXT: movl %gs:0, %eax
+; X86_LINUX-NEXT: addl i2@INDNTPOFF, %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f4:
-; X64_LINUX: movq %fs:0, %rax
-; X64_LINUX-NEXT: addq i2@GOTTPOFF(%rip), %rax
-; X64_LINUX-NEXT: ret
+; X64_LINUX: # %bb.0: # %entry
+; X64_LINUX-NEXT: movq %fs:0, %rax
+; X64_LINUX-NEXT: addq i2@GOTTPOFF(%rip), %rax
+; X64_LINUX-NEXT: retq
+;
+; X86_ISEL_LINUX-LABEL: f4:
+; X86_ISEL_LINUX: # %bb.0: # %entry
+; X86_ISEL_LINUX-NEXT: movl %gs:0, %eax
+; X86_ISEL_LINUX-NEXT: addl i2@INDNTPOFF, %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
+; X64_ISEL_LINUX-LABEL: f4:
+; X64_ISEL_LINUX: # %bb.0: # %entry
+; X64_ISEL_LINUX-NEXT: movq %fs:0, %rax
+; X64_ISEL_LINUX-NEXT: addq i2@GOTTPOFF(%rip), %rax
+; X64_ISEL_LINUX-NEXT: retq
+;
; X86_WIN-LABEL: f4:
-; X86_WIN: movl __tls_index, %eax
-; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT: leal _i2@SECREL32(%eax), %eax
-; X86_WIN-NEXT: ret
+; X86_WIN: # %bb.0: # %entry
+; X86_WIN-NEXT: movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: leal _i2@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
; X64_WIN-LABEL: f4:
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN-NEXT: movq %gs:88, %rcx
-; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: leaq i2@SECREL32(%rax), %rax
-; X64_WIN-NEXT: ret
-; MINGW32-LABEL: _f4:
-; MINGW32: movl __tls_index, %eax
-; MINGW32-NEXT: movl %fs:44, %ecx
-; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT: leal _i2@SECREL32(%eax), %eax
-; MINGW32-NEXT: retl
+; X64_WIN: # %bb.0: # %entry
+; X64_WIN-NEXT: movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i2@SECREL32(%rax), %rax
+; X64_WIN-NEXT: retq
+;
+; MINGW32-LABEL: f4:
+; MINGW32: # %bb.0: # %entry
+; MINGW32-NEXT: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: leal _i2@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
entry:
ret ptr @i2
define dso_local i32 @f5() nounwind {
; X86_LINUX-LABEL: f5:
-; X86_LINUX: movl %gs:i3@NTPOFF, %eax
-; X86_LINUX-NEXT: ret
+; X86_LINUX: # %bb.0: # %entry
+; X86_LINUX-NEXT: movl %gs:i3@NTPOFF, %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f5:
-; X64_LINUX: movl %fs:i3@TPOFF, %eax
-; X64_LINUX-NEXT: ret
+; X64_LINUX: # %bb.0: # %entry
+; X64_LINUX-NEXT: movl %fs:i3@TPOFF, %eax
+; X64_LINUX-NEXT: retq
+;
+; X86_ISEL_LINUX-LABEL: f5:
+; X86_ISEL_LINUX: # %bb.0: # %entry
+; X86_ISEL_LINUX-NEXT: movl %gs:i3@NTPOFF, %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
+; X64_ISEL_LINUX-LABEL: f5:
+; X64_ISEL_LINUX: # %bb.0: # %entry
+; X64_ISEL_LINUX-NEXT: movl %fs:i3@TPOFF, %eax
+; X64_ISEL_LINUX-NEXT: retq
+;
; X86_WIN-LABEL: f5:
-; X86_WIN: movl __tls_index, %eax
-; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT: movl _i3@SECREL32(%eax), %eax
-; X86_WIN-NEXT: ret
+; X86_WIN: # %bb.0: # %entry
+; X86_WIN-NEXT: movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movl _i3@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
; X64_WIN-LABEL: f5:
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN-NEXT: movq %gs:88, %rcx
-; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movl i3@SECREL32(%rax), %eax
-; X64_WIN-NEXT: ret
-; MINGW32-LABEL: _f5:
-; MINGW32: movl __tls_index, %eax
-; MINGW32-NEXT: movl %fs:44, %ecx
-; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT: movl _i3@SECREL32(%eax), %eax
-; MINGW32-NEXT: retl
+; X64_WIN: # %bb.0: # %entry
+; X64_WIN-NEXT: movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i3@SECREL32(%rax), %eax
+; X64_WIN-NEXT: retq
+;
+; MINGW32-LABEL: f5:
+; MINGW32: # %bb.0: # %entry
+; MINGW32-NEXT: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movl _i3@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
entry:
%tmp1 = load i32, ptr @i3
define dso_local ptr @f6() {
; X86_LINUX-LABEL: f6:
-; X86_LINUX: movl %gs:0, %eax
-; X86_LINUX-NEXT: leal i3@NTPOFF(%eax), %eax
-; X86_LINUX-NEXT: ret
+; X86_LINUX: # %bb.0: # %entry
+; X86_LINUX-NEXT: movl %gs:0, %eax
+; X86_LINUX-NEXT: leal i3@NTPOFF(%eax), %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f6:
-; X64_LINUX: movq %fs:0, %rax
-; X64_LINUX-NEXT: leaq i3@TPOFF(%rax), %rax
-; X64_LINUX-NEXT: ret
+; X64_LINUX: # %bb.0: # %entry
+; X64_LINUX-NEXT: movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i3@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: retq
+;
+; X86_ISEL_LINUX-LABEL: f6:
+; X86_ISEL_LINUX: # %bb.0: # %entry
+; X86_ISEL_LINUX-NEXT: movl %gs:0, %eax
+; X86_ISEL_LINUX-NEXT: leal i3@NTPOFF(%eax), %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
+; X64_ISEL_LINUX-LABEL: f6:
+; X64_ISEL_LINUX: # %bb.0: # %entry
+; X64_ISEL_LINUX-NEXT: movq %fs:0, %rax
+; X64_ISEL_LINUX-NEXT: leaq i3@TPOFF(%rax), %rax
+; X64_ISEL_LINUX-NEXT: retq
+;
; X86_WIN-LABEL: f6:
-; X86_WIN: movl __tls_index, %eax
-; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT: leal _i3@SECREL32(%eax), %eax
-; X86_WIN-NEXT: ret
+; X86_WIN: # %bb.0: # %entry
+; X86_WIN-NEXT: movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: leal _i3@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
; X64_WIN-LABEL: f6:
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN-NEXT: movq %gs:88, %rcx
-; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: leaq i3@SECREL32(%rax), %rax
-; X64_WIN-NEXT: ret
-; MINGW32-LABEL: _f6:
-; MINGW32: movl __tls_index, %eax
-; MINGW32-NEXT: movl %fs:44, %ecx
-; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT: leal _i3@SECREL32(%eax), %eax
-; MINGW32-NEXT: retl
+; X64_WIN: # %bb.0: # %entry
+; X64_WIN-NEXT: movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i3@SECREL32(%rax), %rax
+; X64_WIN-NEXT: retq
+;
+; MINGW32-LABEL: f6:
+; MINGW32: # %bb.0: # %entry
+; MINGW32-NEXT: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: leal _i3@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
entry:
ret ptr @i3
define dso_local i32 @f7() {
; X86_LINUX-LABEL: f7:
-; X86_LINUX: movl %gs:i4@NTPOFF, %eax
-; X86_LINUX-NEXT: ret
+; X86_LINUX: # %bb.0: # %entry
+; X86_LINUX-NEXT: movl %gs:i4@NTPOFF, %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f7:
-; X64_LINUX: movl %fs:i4@TPOFF, %eax
-; X64_LINUX-NEXT: ret
-; MINGW32-LABEL: _f7:
-; MINGW32: movl __tls_index, %eax
-; MINGW32-NEXT: movl %fs:44, %ecx
-; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT: movl _i4@SECREL32(%eax), %eax
-; MINGW32-NEXT: retl
+; X64_LINUX: # %bb.0: # %entry
+; X64_LINUX-NEXT: movl %fs:i4@TPOFF, %eax
+; X64_LINUX-NEXT: retq
+;
+; X86_ISEL_LINUX-LABEL: f7:
+; X86_ISEL_LINUX: # %bb.0: # %entry
+; X86_ISEL_LINUX-NEXT: movl %gs:i4@NTPOFF, %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
+; X64_ISEL_LINUX-LABEL: f7:
+; X64_ISEL_LINUX: # %bb.0: # %entry
+; X64_ISEL_LINUX-NEXT: movl %fs:i4@TPOFF, %eax
+; X64_ISEL_LINUX-NEXT: retq
+;
+; X86_WIN-LABEL: f7:
+; X86_WIN: # %bb.0: # %entry
+; X86_WIN-NEXT: movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movl _i4@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
+; X64_WIN-LABEL: f7:
+; X64_WIN: # %bb.0: # %entry
+; X64_WIN-NEXT: movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i4@SECREL32(%rax), %eax
+; X64_WIN-NEXT: retq
+;
+; MINGW32-LABEL: f7:
+; MINGW32: # %bb.0: # %entry
+; MINGW32-NEXT: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movl _i4@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
entry:
%tmp1 = load i32, ptr @i4
define dso_local ptr @f8() {
; X86_LINUX-LABEL: f8:
-; X86_LINUX: movl %gs:0, %eax
-; X86_LINUX-NEXT: leal i4@NTPOFF(%eax), %eax
-; X86_LINUX-NEXT: ret
+; X86_LINUX: # %bb.0: # %entry
+; X86_LINUX-NEXT: movl %gs:0, %eax
+; X86_LINUX-NEXT: leal i4@NTPOFF(%eax), %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f8:
-; X64_LINUX: movq %fs:0, %rax
-; X64_LINUX-NEXT: leaq i4@TPOFF(%rax), %rax
-; X64_LINUX-NEXT: ret
-; MINGW32-LABEL: _f8:
-; MINGW32: movl __tls_index, %eax
-; MINGW32-NEXT: movl %fs:44, %ecx
-; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT: leal _i4@SECREL32(%eax), %eax
-; MINGW32-NEXT: retl
+; X64_LINUX: # %bb.0: # %entry
+; X64_LINUX-NEXT: movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i4@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: retq
+;
+; X86_ISEL_LINUX-LABEL: f8:
+; X86_ISEL_LINUX: # %bb.0: # %entry
+; X86_ISEL_LINUX-NEXT: movl %gs:0, %eax
+; X86_ISEL_LINUX-NEXT: leal i4@NTPOFF(%eax), %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
+; X64_ISEL_LINUX-LABEL: f8:
+; X64_ISEL_LINUX: # %bb.0: # %entry
+; X64_ISEL_LINUX-NEXT: movq %fs:0, %rax
+; X64_ISEL_LINUX-NEXT: leaq i4@TPOFF(%rax), %rax
+; X64_ISEL_LINUX-NEXT: retq
+;
+; X86_WIN-LABEL: f8:
+; X86_WIN: # %bb.0: # %entry
+; X86_WIN-NEXT: movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: leal _i4@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
+; X64_WIN-LABEL: f8:
+; X64_WIN: # %bb.0: # %entry
+; X64_WIN-NEXT: movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i4@SECREL32(%rax), %rax
+; X64_WIN-NEXT: retq
+;
+; MINGW32-LABEL: f8:
+; MINGW32: # %bb.0: # %entry
+; MINGW32-NEXT: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: leal _i4@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
entry:
ret ptr @i4
define dso_local i32 @f9() {
; X86_LINUX-LABEL: f9:
-; X86_LINUX: movl %gs:i5@NTPOFF, %eax
-; X86_LINUX-NEXT: ret
+; X86_LINUX: # %bb.0: # %entry
+; X86_LINUX-NEXT: movl %gs:i5@NTPOFF, %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f9:
-; X64_LINUX: movl %fs:i5@TPOFF, %eax
-; X64_LINUX-NEXT: ret
-; MINGW32-LABEL: _f9:
-; MINGW32: movl __tls_index, %eax
-; MINGW32-NEXT: movl %fs:44, %ecx
-; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT: movl _i5@SECREL32(%eax), %eax
-; MINGW32-NEXT: retl
+; X64_LINUX: # %bb.0: # %entry
+; X64_LINUX-NEXT: movl %fs:i5@TPOFF, %eax
+; X64_LINUX-NEXT: retq
+;
+; X86_ISEL_LINUX-LABEL: f9:
+; X86_ISEL_LINUX: # %bb.0: # %entry
+; X86_ISEL_LINUX-NEXT: movl %gs:i5@NTPOFF, %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
+; X64_ISEL_LINUX-LABEL: f9:
+; X64_ISEL_LINUX: # %bb.0: # %entry
+; X64_ISEL_LINUX-NEXT: movl %fs:i5@TPOFF, %eax
+; X64_ISEL_LINUX-NEXT: retq
+;
+; X86_WIN-LABEL: f9:
+; X86_WIN: # %bb.0: # %entry
+; X86_WIN-NEXT: movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movl _i5@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
+; X64_WIN-LABEL: f9:
+; X64_WIN: # %bb.0: # %entry
+; X64_WIN-NEXT: movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i5@SECREL32(%rax), %eax
+; X64_WIN-NEXT: retq
+;
+; MINGW32-LABEL: f9:
+; MINGW32: # %bb.0: # %entry
+; MINGW32-NEXT: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movl _i5@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
entry:
%tmp1 = load i32, ptr @i5
define dso_local ptr @f10() {
; X86_LINUX-LABEL: f10:
-; X86_LINUX: movl %gs:0, %eax
-; X86_LINUX-NEXT: leal i5@NTPOFF(%eax), %eax
-; X86_LINUX-NEXT: ret
+; X86_LINUX: # %bb.0: # %entry
+; X86_LINUX-NEXT: movl %gs:0, %eax
+; X86_LINUX-NEXT: leal i5@NTPOFF(%eax), %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f10:
-; X64_LINUX: movq %fs:0, %rax
-; X64_LINUX-NEXT: leaq i5@TPOFF(%rax), %rax
-; X64_LINUX-NEXT: ret
-; MINGW32-LABEL: _f10:
-; MINGW32: movl __tls_index, %eax
-; MINGW32-NEXT: movl %fs:44, %ecx
-; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT: leal _i5@SECREL32(%eax), %eax
-; MINGW32-NEXT: retl
+; X64_LINUX: # %bb.0: # %entry
+; X64_LINUX-NEXT: movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i5@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: retq
+;
+; X86_ISEL_LINUX-LABEL: f10:
+; X86_ISEL_LINUX: # %bb.0: # %entry
+; X86_ISEL_LINUX-NEXT: movl %gs:0, %eax
+; X86_ISEL_LINUX-NEXT: leal i5@NTPOFF(%eax), %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
+; X64_ISEL_LINUX-LABEL: f10:
+; X64_ISEL_LINUX: # %bb.0: # %entry
+; X64_ISEL_LINUX-NEXT: movq %fs:0, %rax
+; X64_ISEL_LINUX-NEXT: leaq i5@TPOFF(%rax), %rax
+; X64_ISEL_LINUX-NEXT: retq
+;
+; X86_WIN-LABEL: f10:
+; X86_WIN: # %bb.0: # %entry
+; X86_WIN-NEXT: movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: leal _i5@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
+; X64_WIN-LABEL: f10:
+; X64_WIN: # %bb.0: # %entry
+; X64_WIN-NEXT: movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i5@SECREL32(%rax), %rax
+; X64_WIN-NEXT: retq
+;
+; MINGW32-LABEL: f10:
+; MINGW32: # %bb.0: # %entry
+; MINGW32-NEXT: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: leal _i5@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
entry:
ret ptr @i5
define i16 @f11() {
; X86_LINUX-LABEL: f11:
-; X86_LINUX: movzwl %gs:s1@NTPOFF, %eax
-; X86_LINUX: ret
+; X86_LINUX: # %bb.0: # %entry
+; X86_LINUX-NEXT: movzwl %gs:s1@NTPOFF, %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f11:
-; X64_LINUX: movzwl %fs:s1@TPOFF, %eax
-; X64_LINUX: ret
+; X64_LINUX: # %bb.0: # %entry
+; X64_LINUX-NEXT: movzwl %fs:s1@TPOFF, %eax
+; X64_LINUX-NEXT: retq
+;
+; X86_ISEL_LINUX-LABEL: f11:
+; X86_ISEL_LINUX: # %bb.0: # %entry
+; X86_ISEL_LINUX-NEXT: movzwl %gs:s1@NTPOFF, %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
+; X64_ISEL_LINUX-LABEL: f11:
+; X64_ISEL_LINUX: # %bb.0: # %entry
+; X64_ISEL_LINUX-NEXT: movzwl %fs:s1@TPOFF, %eax
+; X64_ISEL_LINUX-NEXT: retq
+;
; X86_WIN-LABEL: f11:
-; X86_WIN: movl __tls_index, %eax
-; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT: movzwl _s1@SECREL32(%eax), %eax
-; X86_WIN: ret
+; X86_WIN: # %bb.0: # %entry
+; X86_WIN-NEXT: movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movzwl _s1@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
; X64_WIN-LABEL: f11:
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN-NEXT: movq %gs:88, %rcx
-; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movzwl s1@SECREL32(%rax), %eax
-; X64_WIN: ret
-; MINGW32-LABEL: _f11:
-; MINGW32: movl __tls_index, %eax
-; MINGW32-NEXT: movl %fs:44, %ecx
-; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT: movzwl _s1@SECREL32(%eax), %eax
-; MINGW32: retl
+; X64_WIN: # %bb.0: # %entry
+; X64_WIN-NEXT: movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movzwl s1@SECREL32(%rax), %eax
+; X64_WIN-NEXT: retq
+;
+; MINGW32-LABEL: f11:
+; MINGW32: # %bb.0: # %entry
+; MINGW32-NEXT: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movzwl _s1@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
entry:
%tmp1 = load i16, ptr @s1
define dso_local i32 @f12() {
; X86_LINUX-LABEL: f12:
-; X86_LINUX: movswl %gs:s1@NTPOFF, %eax
-; X86_LINUX-NEXT: ret
+; X86_LINUX: # %bb.0: # %entry
+; X86_LINUX-NEXT: movswl %gs:s1@NTPOFF, %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f12:
-; X64_LINUX: movswl %fs:s1@TPOFF, %eax
-; X64_LINUX-NEXT: ret
+; X64_LINUX: # %bb.0: # %entry
+; X64_LINUX-NEXT: movswl %fs:s1@TPOFF, %eax
+; X64_LINUX-NEXT: retq
+;
+; X86_ISEL_LINUX-LABEL: f12:
+; X86_ISEL_LINUX: # %bb.0: # %entry
+; X86_ISEL_LINUX-NEXT: movswl %gs:s1@NTPOFF, %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
+; X64_ISEL_LINUX-LABEL: f12:
+; X64_ISEL_LINUX: # %bb.0: # %entry
+; X64_ISEL_LINUX-NEXT: movswl %fs:s1@TPOFF, %eax
+; X64_ISEL_LINUX-NEXT: retq
+;
; X86_WIN-LABEL: f12:
-; X86_WIN: movl __tls_index, %eax
-; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT: movswl _s1@SECREL32(%eax), %eax
-; X86_WIN-NEXT: ret
+; X86_WIN: # %bb.0: # %entry
+; X86_WIN-NEXT: movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movswl _s1@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
; X64_WIN-LABEL: f12:
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN-NEXT: movq %gs:88, %rcx
-; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movswl s1@SECREL32(%rax), %eax
-; X64_WIN-NEXT: ret
-; MINGW32-LABEL: _f12:
-; MINGW32: movl __tls_index, %eax
-; MINGW32-NEXT: movl %fs:44, %ecx
-; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT: movswl _s1@SECREL32(%eax), %eax
-; MINGW32-NEXT: retl
+; X64_WIN: # %bb.0: # %entry
+; X64_WIN-NEXT: movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movswl s1@SECREL32(%rax), %eax
+; X64_WIN-NEXT: retq
+;
+; MINGW32-LABEL: f12:
+; MINGW32: # %bb.0: # %entry
+; MINGW32-NEXT: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movswl _s1@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
entry:
define dso_local i8 @f13() {
; X86_LINUX-LABEL: f13:
-; X86_LINUX: movb %gs:b1@NTPOFF, %al
-; X86_LINUX-NEXT: ret
+; X86_LINUX: # %bb.0: # %entry
+; X86_LINUX-NEXT: movzbl %gs:b1@NTPOFF, %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f13:
-; X64_LINUX: movb %fs:b1@TPOFF, %al
-; X64_LINUX-NEXT: ret
+; X64_LINUX: # %bb.0: # %entry
+; X64_LINUX-NEXT: movzbl %fs:b1@TPOFF, %eax
+; X64_LINUX-NEXT: retq
+;
+; X86_ISEL_LINUX-LABEL: f13:
+; X86_ISEL_LINUX: # %bb.0: # %entry
+; X86_ISEL_LINUX-NEXT: movzbl %gs:b1@NTPOFF, %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
+; X64_ISEL_LINUX-LABEL: f13:
+; X64_ISEL_LINUX: # %bb.0: # %entry
+; X64_ISEL_LINUX-NEXT: movzbl %fs:b1@TPOFF, %eax
+; X64_ISEL_LINUX-NEXT: retq
+;
; X86_WIN-LABEL: f13:
-; X86_WIN: movl __tls_index, %eax
-; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT: movb _b1@SECREL32(%eax), %al
-; X86_WIN-NEXT: ret
+; X86_WIN: # %bb.0: # %entry
+; X86_WIN-NEXT: movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movzbl _b1@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
; X64_WIN-LABEL: f13:
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN-NEXT: movq %gs:88, %rcx
-; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movb b1@SECREL32(%rax), %al
-; X64_WIN-NEXT: ret
-; MINGW32-LABEL: _f13:
-; MINGW32: movl __tls_index, %eax
-; MINGW32-NEXT: movl %fs:44, %ecx
-; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT: movb _b1@SECREL32(%eax), %al
-; MINGW32-NEXT: retl
+; X64_WIN: # %bb.0: # %entry
+; X64_WIN-NEXT: movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movzbl b1@SECREL32(%rax), %eax
+; X64_WIN-NEXT: retq
+;
+; MINGW32-LABEL: f13:
+; MINGW32: # %bb.0: # %entry
+; MINGW32-NEXT: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movzbl _b1@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
entry:
%tmp1 = load i8, ptr @b1
define dso_local i32 @f14() {
; X86_LINUX-LABEL: f14:
-; X86_LINUX: movsbl %gs:b1@NTPOFF, %eax
-; X86_LINUX-NEXT: ret
+; X86_LINUX: # %bb.0: # %entry
+; X86_LINUX-NEXT: movsbl %gs:b1@NTPOFF, %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f14:
-; X64_LINUX: movsbl %fs:b1@TPOFF, %eax
-; X64_LINUX-NEXT: ret
+; X64_LINUX: # %bb.0: # %entry
+; X64_LINUX-NEXT: movsbl %fs:b1@TPOFF, %eax
+; X64_LINUX-NEXT: retq
+;
+; X86_ISEL_LINUX-LABEL: f14:
+; X86_ISEL_LINUX: # %bb.0: # %entry
+; X86_ISEL_LINUX-NEXT: movsbl %gs:b1@NTPOFF, %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
+; X64_ISEL_LINUX-LABEL: f14:
+; X64_ISEL_LINUX: # %bb.0: # %entry
+; X64_ISEL_LINUX-NEXT: movsbl %fs:b1@TPOFF, %eax
+; X64_ISEL_LINUX-NEXT: retq
+;
; X86_WIN-LABEL: f14:
-; X86_WIN: movl __tls_index, %eax
-; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
-; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
-; X86_WIN-NEXT: movsbl _b1@SECREL32(%eax), %eax
-; X86_WIN-NEXT: ret
+; X86_WIN: # %bb.0: # %entry
+; X86_WIN-NEXT: movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movsbl _b1@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
; X64_WIN-LABEL: f14:
-; X64_WIN: movl _tls_index(%rip), %eax
-; X64_WIN-NEXT: movq %gs:88, %rcx
-; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movsbl b1@SECREL32(%rax), %eax
-; X64_WIN-NEXT: ret
-; MINGW32-LABEL: _f14:
-; MINGW32: movl __tls_index, %eax
-; MINGW32-NEXT: movl %fs:44, %ecx
-; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
-; MINGW32-NEXT: movsbl _b1@SECREL32(%eax), %eax
-; MINGW32-NEXT: retl
+; X64_WIN: # %bb.0: # %entry
+; X64_WIN-NEXT: movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movsbl b1@SECREL32(%rax), %eax
+; X64_WIN-NEXT: retq
+;
+; MINGW32-LABEL: f14:
+; MINGW32: # %bb.0: # %entry
+; MINGW32-NEXT: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movsbl _b1@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
entry:
%tmp1 = load i8, ptr @b1
define dso_local ptr @f15() {
; X86_LINUX-LABEL: f15:
-; X86_LINUX: movl %gs:0, %eax
-; X86_LINUX-NEXT: leal b2@NTPOFF(%eax), %eax
-; X86_LINUX-NEXT: ret
+; X86_LINUX: # %bb.0: # %entry
+; X86_LINUX-NEXT: movl %gs:0, %eax
+; X86_LINUX-NEXT: leal b2@NTPOFF(%eax), %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f15:
-; X64_LINUX: movq %fs:0, %rax
-; X64_LINUX-NEXT: leaq b2@TPOFF(%rax), %rax
-; X64_LINUX-NEXT: ret
+; X64_LINUX: # %bb.0: # %entry
+; X64_LINUX-NEXT: movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq b2@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: retq
+;
+; X86_ISEL_LINUX-LABEL: f15:
+; X86_ISEL_LINUX: # %bb.0: # %entry
+; X86_ISEL_LINUX-NEXT: movl %gs:0, %eax
+; X86_ISEL_LINUX-NEXT: leal b2@NTPOFF(%eax), %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
+; X64_ISEL_LINUX-LABEL: f15:
+; X64_ISEL_LINUX: # %bb.0: # %entry
+; X64_ISEL_LINUX-NEXT: movq %fs:0, %rax
+; X64_ISEL_LINUX-NEXT: leaq b2@TPOFF(%rax), %rax
+; X64_ISEL_LINUX-NEXT: retq
+;
; X86_WIN-LABEL: f15:
-; X86_WIN: movl %fs:__tls_array, %eax
-; X86_WIN-NEXT: movl (%eax), %eax
-; X86_WIN-NEXT: leal _b2@SECREL32(%eax), %eax
-; X86_WIN-NEXT: ret
+; X86_WIN: # %bb.0: # %entry
+; X86_WIN-NEXT: movl %fs:__tls_array, %eax
+; X86_WIN-NEXT: movl (%eax), %eax
+; X86_WIN-NEXT: leal _b2@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
; X64_WIN-LABEL: f15:
-; X64_WIN: movq %gs:88, %rax
-; X64_WIN-NEXT: movq (%rax), %rax
-; X64_WIN-NEXT: leaq b2@SECREL32(%rax), %rax
-; X64_WIN-NEXT: ret
+; X64_WIN: # %bb.0: # %entry
+; X64_WIN-NEXT: movq %gs:88, %rax
+; X64_WIN-NEXT: movq (%rax), %rax
+; X64_WIN-NEXT: leaq b2@SECREL32(%rax), %rax
+; X64_WIN-NEXT: retq
+;
; MINGW32-LABEL: f15:
-; MINGW32: movl %fs:44, %eax
-; MINGW32-NEXT: movl (%eax), %eax
-; MINGW32-NEXT: leal _b2@SECREL32(%eax), %eax
-; MINGW32-NEXT: ret
+; MINGW32: # %bb.0: # %entry
+; MINGW32-NEXT: movl %fs:44, %eax
+; MINGW32-NEXT: movl (%eax), %eax
+; MINGW32-NEXT: leal _b2@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
entry:
ret ptr @b2
}
define dso_local ptr @f16() {
; X86_LINUX-LABEL: f16:
-; X86_LINUX: movl %gs:0, %eax
-; X86_LINUX-NEXT: leal i6@NTPOFF(%eax), %eax
-; X86_LINUX-NEXT: ret
-
+; X86_LINUX: # %bb.0:
+; X86_LINUX-NEXT: movl %gs:0, %eax
+; X86_LINUX-NEXT: leal i6@NTPOFF(%eax), %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f16:
-; X64_LINUX: movq %fs:0, %rax
-; X64_LINUX-NEXT: leaq i6@TPOFF(%rax), %rax
-; X64_LINUX-NEXT: ret
+; X64_LINUX: # %bb.0:
+; X64_LINUX-NEXT: movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i6@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: retq
+;
+; X86_ISEL_LINUX-LABEL: f16:
+; X86_ISEL_LINUX: # %bb.0:
+; X86_ISEL_LINUX-NEXT: movl %gs:0, %eax
+; X86_ISEL_LINUX-NEXT: leal i6@NTPOFF(%eax), %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
+; X64_ISEL_LINUX-LABEL: f16:
+; X64_ISEL_LINUX: # %bb.0:
+; X64_ISEL_LINUX-NEXT: movq %fs:0, %rax
+; X64_ISEL_LINUX-NEXT: leaq i6@TPOFF(%rax), %rax
+; X64_ISEL_LINUX-NEXT: retq
+;
+; X86_WIN-LABEL: f16:
+; X86_WIN: # %bb.0:
+; X86_WIN-NEXT: movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: leal _i6@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
+; X64_WIN-LABEL: f16:
+; X64_WIN: # %bb.0:
+; X64_WIN-NEXT: movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i6@SECREL32(%rax), %rax
+; X64_WIN-NEXT: retq
+;
+; MINGW32-LABEL: f16:
+; MINGW32: # %bb.0:
+; MINGW32-NEXT: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: leal _i6@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
+
ret ptr @i6
}
; NOTE: Similar to f1() but with direct TLS segment access disabled
define dso_local i32 @f17() #0 {
; X86_LINUX-LABEL: f17:
-; X86_LINUX: movl %gs:0, %eax
-; X86_LINUX-NEXT: movl i1@NTPOFF(%eax), %eax
-; X86_LINUX-NEXT: ret
+; X86_LINUX: # %bb.0: # %entry
+; X86_LINUX-NEXT: movl %gs:0, %eax
+; X86_LINUX-NEXT: movl i1@NTPOFF(%eax), %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f17:
-; X64_LINUX: movq %fs:0, %rax
-; X64_LINUX-NEXT: movl i1@TPOFF(%rax), %eax
-; X64_LINUX-NEXT: ret
+; X64_LINUX: # %bb.0: # %entry
+; X64_LINUX-NEXT: movq %fs:0, %rax
+; X64_LINUX-NEXT: movl i1@TPOFF(%rax), %eax
+; X64_LINUX-NEXT: retq
+;
; X86_ISEL_LINUX-LABEL: f17:
-; X86_ISEL_LINUX: movl %gs:0, %eax
-; X86_ISEL_LINUX-NEXT: movl i1@NTPOFF(%eax), %eax
-; X86_ISEL_LINUX-NEXT: ret
+; X86_ISEL_LINUX: # %bb.0: # %entry
+; X86_ISEL_LINUX-NEXT: movl %gs:0, %eax
+; X86_ISEL_LINUX-NEXT: movl i1@NTPOFF(%eax), %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
; X64_ISEL_LINUX-LABEL: f17:
-; X64_ISEL_LINUX: movq %fs:0, %rax
-; X64_ISEL_LINUX-NEXT: movl i1@TPOFF(%rax), %eax
-; X64_ISEL_LINUX-NEXT: ret
+; X64_ISEL_LINUX: # %bb.0: # %entry
+; X64_ISEL_LINUX-NEXT: movq %fs:0, %rax
+; X64_ISEL_LINUX-NEXT: movl i1@TPOFF(%rax), %eax
+; X64_ISEL_LINUX-NEXT: retq
+;
+; X86_WIN-LABEL: f17:
+; X86_WIN: # %bb.0: # %entry
+; X86_WIN-NEXT: movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movl _i1@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
+; X64_WIN-LABEL: f17:
+; X64_WIN: # %bb.0: # %entry
+; X64_WIN-NEXT: movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i1@SECREL32(%rax), %eax
+; X64_WIN-NEXT: retq
+;
+; MINGW32-LABEL: f17:
+; MINGW32: # %bb.0: # %entry
+; MINGW32-NEXT: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movl _i1@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
entry:
%tmp1 = load i32, ptr @i1
; NOTE: Similar to f3() but with direct TLS segment access disabled
define dso_local i32 @f18() #1 {
; X86_LINUX-LABEL: f18:
-; X86_LINUX: movl i2@INDNTPOFF, %eax
-; X86_LINUX-NEXT: movl %gs:0, %ecx
-; X86_LINUX-NEXT: movl (%ecx,%eax), %eax
-; X86_LINUX-NEXT: ret
+; X86_LINUX: # %bb.0: # %entry
+; X86_LINUX-NEXT: movl i2@INDNTPOFF, %eax
+; X86_LINUX-NEXT: movl %gs:0, %ecx
+; X86_LINUX-NEXT: movl (%ecx,%eax), %eax
+; X86_LINUX-NEXT: retl
+;
; X64_LINUX-LABEL: f18:
-; X64_LINUX: movq i2@GOTTPOFF(%rip), %rax
-; X64_LINUX-NEXT: movq %fs:0, %rcx
-; X64_LINUX-NEXT: movl (%rcx,%rax), %eax
-; X64_LINUX-NEXT: ret
+; X64_LINUX: # %bb.0: # %entry
+; X64_LINUX-NEXT: movq i2@GOTTPOFF(%rip), %rax
+; X64_LINUX-NEXT: movq %fs:0, %rcx
+; X64_LINUX-NEXT: movl (%rcx,%rax), %eax
+; X64_LINUX-NEXT: retq
+;
; X86_ISEL_LINUX-LABEL: f18:
-; X86_ISEL_LINUX: movl i2@INDNTPOFF, %eax
-; X86_ISEL_LINUX-NEXT: movl %gs:0, %ecx
-; X86_ISEL_LINUX-NEXT: movl (%ecx,%eax), %eax
-; X86_ISEL_LINUX-NEXT: ret
+; X86_ISEL_LINUX: # %bb.0: # %entry
+; X86_ISEL_LINUX-NEXT: movl i2@INDNTPOFF, %eax
+; X86_ISEL_LINUX-NEXT: movl %gs:0, %ecx
+; X86_ISEL_LINUX-NEXT: movl (%ecx,%eax), %eax
+; X86_ISEL_LINUX-NEXT: retl
+;
; X64_ISEL_LINUX-LABEL: f18:
-; X64_ISEL_LINUX: movq i2@GOTTPOFF(%rip), %rax
-; X64_ISEL_LINUX-NEXT: movq %fs:0, %rcx
-; X64_ISEL_LINUX-NEXT: movl (%rcx,%rax), %eax
-; X64_ISEL_LINUX-NEXT: ret
+; X64_ISEL_LINUX: # %bb.0: # %entry
+; X64_ISEL_LINUX-NEXT: movq i2@GOTTPOFF(%rip), %rax
+; X64_ISEL_LINUX-NEXT: movq %fs:0, %rcx
+; X64_ISEL_LINUX-NEXT: movl (%rcx,%rax), %eax
+; X64_ISEL_LINUX-NEXT: retq
+;
+; X86_WIN-LABEL: f18:
+; X86_WIN: # %bb.0: # %entry
+; X86_WIN-NEXT: movl __tls_index, %eax
+; X86_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X86_WIN-NEXT: movl _i2@SECREL32(%eax), %eax
+; X86_WIN-NEXT: retl
+;
+; X64_WIN-LABEL: f18:
+; X64_WIN: # %bb.0: # %entry
+; X64_WIN-NEXT: movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i2@SECREL32(%rax), %eax
+; X64_WIN-NEXT: retq
+;
+; MINGW32-LABEL: f18:
+; MINGW32: # %bb.0: # %entry
+; MINGW32-NEXT: movl __tls_index, %eax
+; MINGW32-NEXT: movl %fs:44, %ecx
+; MINGW32-NEXT: movl (%ecx,%eax,4), %eax
+; MINGW32-NEXT: movl _i2@SECREL32(%eax), %eax
+; MINGW32-NEXT: retl
entry:
define zeroext i1 @test1(i32 %X) nounwind {
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
-; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al
+; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: andb $1, %al
; CHECK-NEXT: retl
%Y = trunc i32 %X to i1
define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind {
; X86-LABEL: func8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addb {{[0-9]+}}(%esp), %al
; X86-NEXT: movzbl %al, %ecx
; X86-NEXT: movl $255, %eax
define zeroext i4 @func3(i4 zeroext %x, i4 zeroext %y) nounwind {
; X86-LABEL: func3:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addb {{[0-9]+}}(%esp), %al
; X86-NEXT: movzbl %al, %ecx
; X86-NEXT: cmpb $15, %al
define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind {
; X86-LABEL: func8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mulb {{[0-9]+}}(%esp)
; X86-NEXT: addb {{[0-9]+}}(%esp), %al
; X86-NEXT: movzbl %al, %ecx
define zeroext i4 @func4(i4 zeroext %x, i4 zeroext %y, i4 zeroext %z) nounwind {
; X86-LABEL: func4:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mulb {{[0-9]+}}(%esp)
; X86-NEXT: andb $15, %al
; X86-NEXT: addb {{[0-9]+}}(%esp), %al
define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind {
; SSE-LABEL: v1i8:
; SSE: # %bb.0:
-; SSE-NEXT: movb (%rdi), %al
+; SSE-NEXT: movzbl (%rdi), %eax
; SSE-NEXT: addb (%rsi), %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movl $255, %ecx
;
; AVX-LABEL: v1i8:
; AVX: # %bb.0:
-; AVX-NEXT: movb (%rdi), %al
+; AVX-NEXT: movzbl (%rdi), %eax
; AVX-NEXT: addb (%rsi), %al
; AVX-NEXT: movzbl %al, %eax
; AVX-NEXT: movl $255, %ecx
;
; X86-LABEL: func4:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $15, %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $15, %al
; X86-NEXT: shlb $2, %al
; X86-NEXT: movzbl %al, %eax
;
; X86-LABEL: func4:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $15, %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $15, %al
; X86-NEXT: shlb $2, %al
; X86-NEXT: movzbl %al, %eax
;
; X86-LABEL: func3:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $15, %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $15, %cl
; X86-NEXT: mulb %cl
; X86-NEXT: shrb $2, %al
;
; X86-LABEL: func6:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $15, %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $15, %cl
; X86-NEXT: mulb %cl
; X86-NEXT: retl
;
; X86-LABEL: func3:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $15, %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzbl %al, %edx
; X86-NEXT: shlb $4, %cl
; X86-NEXT: movzbl %cl, %eax
;
; X86-LABEL: func6:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $15, %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlb $4, %al
; X86-NEXT: mulb %cl
; X86-NEXT: movzbl %al, %ecx
; X86-NEXT: testl %edi, %edi
; X86-NEXT: setne %bh
; X86-NEXT: andb %cl, %bh
-; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; CHECK-BASELINE-LABEL: out_v4i8:
; CHECK-BASELINE: # %bb.0:
; CHECK-BASELINE-NEXT: movq %rdi, %rax
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
; CHECK-BASELINE-NEXT: xorb %r9b, %sil
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil
; CHECK-BASELINE-NEXT: xorb %r9b, %sil
; CHECK-SSE1-LABEL: out_v4i8:
; CHECK-SSE1: # %bb.0:
; CHECK-SSE1-NEXT: movq %rdi, %rax
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
; CHECK-SSE1-NEXT: xorb %r9b, %sil
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil
; CHECK-SSE1-NEXT: xorb %r9b, %sil
; CHECK-BASELINE-LABEL: out_v4i8_undef:
; CHECK-BASELINE: # %bb.0:
; CHECK-BASELINE-NEXT: movq %rdi, %rax
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl
; CHECK-BASELINE-NEXT: xorb %r9b, %sil
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil
; CHECK-SSE1-LABEL: out_v4i8_undef:
; CHECK-SSE1: # %bb.0:
; CHECK-SSE1-NEXT: movq %rdi, %rax
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl
; CHECK-SSE1-NEXT: xorb %r9b, %sil
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil
; CHECK-BASELINE-NEXT: pushq %r12
; CHECK-BASELINE-NEXT: pushq %rbx
; CHECK-BASELINE-NEXT: movq %rdi, %rax
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
; CHECK-BASELINE-NEXT: xorb %bl, %sil
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil
; CHECK-BASELINE-NEXT: xorb %bl, %sil
; CHECK-BASELINE-NEXT: xorb %bpl, %r9b
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b
; CHECK-BASELINE-NEXT: xorb %bpl, %r9b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
; CHECK-BASELINE-NEXT: xorb %r11b, %bpl
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl
; CHECK-BASELINE-NEXT: xorb %r11b, %bpl
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
; CHECK-BASELINE-NEXT: xorb %r10b, %r11b
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b
; CHECK-BASELINE-NEXT: xorb %r10b, %r11b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
; CHECK-BASELINE-NEXT: xorb %dil, %bl
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl
; CHECK-BASELINE-NEXT: xorb %dil, %bl
; CHECK-SSE1-NEXT: pushq %r12
; CHECK-SSE1-NEXT: pushq %rbx
; CHECK-SSE1-NEXT: movq %rdi, %rax
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
; CHECK-SSE1-NEXT: xorb %bl, %sil
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil
; CHECK-SSE1-NEXT: xorb %bl, %sil
; CHECK-SSE1-NEXT: xorb %bpl, %r9b
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b
; CHECK-SSE1-NEXT: xorb %bpl, %r9b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
; CHECK-SSE1-NEXT: xorb %r11b, %bpl
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl
; CHECK-SSE1-NEXT: xorb %r11b, %bpl
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
; CHECK-SSE1-NEXT: xorb %r10b, %r11b
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b
; CHECK-SSE1-NEXT: xorb %r10b, %r11b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
; CHECK-SSE1-NEXT: xorb %dil, %bl
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl
; CHECK-SSE1-NEXT: xorb %dil, %bl
; CHECK-BASELINE-NEXT: pushq %r12
; CHECK-BASELINE-NEXT: pushq %rbx
; CHECK-BASELINE-NEXT: movl %edx, %r11d
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
; CHECK-BASELINE-NEXT: xorb %bl, %sil
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil
; CHECK-BASELINE-NEXT: xorb %bl, %sil
; CHECK-BASELINE-NEXT: xorb %r12b, %r9b
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b
; CHECK-BASELINE-NEXT: xorb %r12b, %r9b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d
; CHECK-BASELINE-NEXT: xorb %bpl, %r12b
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r12b
; CHECK-BASELINE-NEXT: xorb %bpl, %r12b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
; CHECK-BASELINE-NEXT: xorb %r14b, %bpl
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl
; CHECK-BASELINE-NEXT: xorb %r14b, %bpl
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
; CHECK-BASELINE-NEXT: xorb %r15b, %sil
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil
; CHECK-BASELINE-NEXT: xorb %r15b, %sil
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; CHECK-BASELINE-NEXT: xorb %r13b, %dl
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl
; CHECK-BASELINE-NEXT: xorb %r13b, %dl
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl
; CHECK-BASELINE-NEXT: xorb %al, %cl
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-BASELINE-NEXT: xorb %al, %r13b
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r13b
; CHECK-BASELINE-NEXT: xorb %al, %r13b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-BASELINE-NEXT: xorb %al, %r15b
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r15b
; CHECK-BASELINE-NEXT: xorb %al, %r15b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-BASELINE-NEXT: xorb %al, %r14b
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r14b
; CHECK-BASELINE-NEXT: xorb %al, %r14b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-BASELINE-NEXT: xorb %al, %bl
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl
; CHECK-BASELINE-NEXT: xorb %al, %bl
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r8b
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d
; CHECK-BASELINE-NEXT: xorb %r8b, %al
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al
; CHECK-BASELINE-NEXT: xorb %r8b, %al
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r8b
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d
; CHECK-BASELINE-NEXT: xorb %r8b, %r10b
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b
; CHECK-BASELINE-NEXT: xorb %r8b, %r10b
; CHECK-SSE1-NEXT: pushq %r12
; CHECK-SSE1-NEXT: pushq %rbx
; CHECK-SSE1-NEXT: movl %edx, %r11d
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
; CHECK-SSE1-NEXT: xorb %bl, %sil
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil
; CHECK-SSE1-NEXT: xorb %bl, %sil
; CHECK-SSE1-NEXT: xorb %r12b, %r9b
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b
; CHECK-SSE1-NEXT: xorb %r12b, %r9b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d
; CHECK-SSE1-NEXT: xorb %bpl, %r12b
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r12b
; CHECK-SSE1-NEXT: xorb %bpl, %r12b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
; CHECK-SSE1-NEXT: xorb %r14b, %bpl
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl
; CHECK-SSE1-NEXT: xorb %r14b, %bpl
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
; CHECK-SSE1-NEXT: xorb %r15b, %sil
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil
; CHECK-SSE1-NEXT: xorb %r15b, %sil
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; CHECK-SSE1-NEXT: xorb %r13b, %dl
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl
; CHECK-SSE1-NEXT: xorb %r13b, %dl
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl
; CHECK-SSE1-NEXT: xorb %al, %cl
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-SSE1-NEXT: xorb %al, %r13b
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r13b
; CHECK-SSE1-NEXT: xorb %al, %r13b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-SSE1-NEXT: xorb %al, %r15b
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r15b
; CHECK-SSE1-NEXT: xorb %al, %r15b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-SSE1-NEXT: xorb %al, %r14b
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r14b
; CHECK-SSE1-NEXT: xorb %al, %r14b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-SSE1-NEXT: xorb %al, %bl
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl
; CHECK-SSE1-NEXT: xorb %al, %bl
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r8b
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d
; CHECK-SSE1-NEXT: xorb %r8b, %al
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al
; CHECK-SSE1-NEXT: xorb %r8b, %al
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r8b
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d
; CHECK-SSE1-NEXT: xorb %r8b, %r10b
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b
; CHECK-SSE1-NEXT: xorb %r8b, %r10b
; CHECK-BASELINE-NEXT: movq %rdx, %r8
; CHECK-BASELINE-NEXT: movq %rsi, %r9
; CHECK-BASELINE-NEXT: movq %rdi, %r11
-; CHECK-BASELINE-NEXT: movb 15(%rdx), %al
+; CHECK-BASELINE-NEXT: movzbl 15(%rdx), %eax
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 14(%rdx), %al
+; CHECK-BASELINE-NEXT: movzbl 14(%rdx), %eax
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 13(%rdx), %al
+; CHECK-BASELINE-NEXT: movzbl 13(%rdx), %eax
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 12(%rdx), %al
+; CHECK-BASELINE-NEXT: movzbl 12(%rdx), %eax
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 11(%rdx), %al
+; CHECK-BASELINE-NEXT: movzbl 11(%rdx), %eax
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 10(%rdx), %al
+; CHECK-BASELINE-NEXT: movzbl 10(%rdx), %eax
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 9(%rdx), %bpl
-; CHECK-BASELINE-NEXT: movb 8(%rdx), %r14b
-; CHECK-BASELINE-NEXT: movb 7(%rdx), %r15b
-; CHECK-BASELINE-NEXT: movb 6(%rdx), %r12b
-; CHECK-BASELINE-NEXT: movb 5(%rdx), %sil
-; CHECK-BASELINE-NEXT: movb 4(%rdx), %r13b
-; CHECK-BASELINE-NEXT: movb 3(%rdx), %dl
-; CHECK-BASELINE-NEXT: movb 2(%r8), %dil
-; CHECK-BASELINE-NEXT: movb (%r8), %al
-; CHECK-BASELINE-NEXT: movb 1(%r8), %cl
-; CHECK-BASELINE-NEXT: movb (%r9), %bl
+; CHECK-BASELINE-NEXT: movzbl 9(%rdx), %ebp
+; CHECK-BASELINE-NEXT: movzbl 8(%rdx), %r14d
+; CHECK-BASELINE-NEXT: movzbl 7(%rdx), %r15d
+; CHECK-BASELINE-NEXT: movzbl 6(%rdx), %r12d
+; CHECK-BASELINE-NEXT: movzbl 5(%rdx), %esi
+; CHECK-BASELINE-NEXT: movzbl 4(%rdx), %r13d
+; CHECK-BASELINE-NEXT: movzbl 3(%rdx), %edx
+; CHECK-BASELINE-NEXT: movzbl 2(%r8), %edi
+; CHECK-BASELINE-NEXT: movzbl (%r8), %eax
+; CHECK-BASELINE-NEXT: movzbl 1(%r8), %ecx
+; CHECK-BASELINE-NEXT: movzbl (%r9), %ebx
; CHECK-BASELINE-NEXT: xorb %al, %bl
; CHECK-BASELINE-NEXT: andb (%r10), %bl
; CHECK-BASELINE-NEXT: xorb %al, %bl
; CHECK-BASELINE-NEXT: movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 1(%r9), %al
+; CHECK-BASELINE-NEXT: movzbl 1(%r9), %eax
; CHECK-BASELINE-NEXT: xorb %cl, %al
; CHECK-BASELINE-NEXT: andb 1(%r10), %al
; CHECK-BASELINE-NEXT: xorb %cl, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 2(%r9), %al
+; CHECK-BASELINE-NEXT: movzbl 2(%r9), %eax
; CHECK-BASELINE-NEXT: xorb %dil, %al
; CHECK-BASELINE-NEXT: andb 2(%r10), %al
; CHECK-BASELINE-NEXT: xorb %dil, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 3(%r9), %al
+; CHECK-BASELINE-NEXT: movzbl 3(%r9), %eax
; CHECK-BASELINE-NEXT: xorb %dl, %al
; CHECK-BASELINE-NEXT: andb 3(%r10), %al
; CHECK-BASELINE-NEXT: xorb %dl, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 4(%r9), %al
+; CHECK-BASELINE-NEXT: movzbl 4(%r9), %eax
; CHECK-BASELINE-NEXT: xorb %r13b, %al
; CHECK-BASELINE-NEXT: andb 4(%r10), %al
; CHECK-BASELINE-NEXT: xorb %r13b, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 5(%r9), %al
+; CHECK-BASELINE-NEXT: movzbl 5(%r9), %eax
; CHECK-BASELINE-NEXT: xorb %sil, %al
; CHECK-BASELINE-NEXT: andb 5(%r10), %al
; CHECK-BASELINE-NEXT: xorb %sil, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 6(%r9), %al
+; CHECK-BASELINE-NEXT: movzbl 6(%r9), %eax
; CHECK-BASELINE-NEXT: xorb %r12b, %al
; CHECK-BASELINE-NEXT: andb 6(%r10), %al
; CHECK-BASELINE-NEXT: xorb %r12b, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 7(%r9), %al
+; CHECK-BASELINE-NEXT: movzbl 7(%r9), %eax
; CHECK-BASELINE-NEXT: xorb %r15b, %al
; CHECK-BASELINE-NEXT: andb 7(%r10), %al
; CHECK-BASELINE-NEXT: xorb %r15b, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 8(%r9), %al
+; CHECK-BASELINE-NEXT: movzbl 8(%r9), %eax
; CHECK-BASELINE-NEXT: xorb %r14b, %al
; CHECK-BASELINE-NEXT: andb 8(%r10), %al
; CHECK-BASELINE-NEXT: xorb %r14b, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 9(%r9), %al
+; CHECK-BASELINE-NEXT: movzbl 9(%r9), %eax
; CHECK-BASELINE-NEXT: xorb %bpl, %al
; CHECK-BASELINE-NEXT: andb 9(%r10), %al
; CHECK-BASELINE-NEXT: xorb %bpl, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 10(%r9), %al
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl 10(%r9), %eax
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: xorb %cl, %al
; CHECK-BASELINE-NEXT: andb 10(%r10), %al
; CHECK-BASELINE-NEXT: xorb %cl, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 11(%r9), %al
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl 11(%r9), %eax
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: xorb %cl, %al
; CHECK-BASELINE-NEXT: andb 11(%r10), %al
; CHECK-BASELINE-NEXT: xorb %cl, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 12(%r9), %al
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl 12(%r9), %eax
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: xorb %cl, %al
; CHECK-BASELINE-NEXT: andb 12(%r10), %al
; CHECK-BASELINE-NEXT: xorb %cl, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 13(%r9), %al
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl 13(%r9), %eax
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: xorb %cl, %al
; CHECK-BASELINE-NEXT: andb 13(%r10), %al
; CHECK-BASELINE-NEXT: xorb %cl, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 14(%r9), %al
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl 14(%r9), %eax
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: xorb %cl, %al
; CHECK-BASELINE-NEXT: andb 14(%r10), %al
; CHECK-BASELINE-NEXT: xorb %cl, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 15(%r9), %al
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl 15(%r9), %eax
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: xorb %cl, %al
; CHECK-BASELINE-NEXT: andb 15(%r10), %al
; CHECK-BASELINE-NEXT: xorb %cl, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 16(%r8), %al
-; CHECK-BASELINE-NEXT: movb 16(%r9), %cl
+; CHECK-BASELINE-NEXT: movzbl 16(%r8), %eax
+; CHECK-BASELINE-NEXT: movzbl 16(%r9), %ecx
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: andb 16(%r10), %cl
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 17(%r8), %al
-; CHECK-BASELINE-NEXT: movb 17(%r9), %cl
+; CHECK-BASELINE-NEXT: movzbl 17(%r8), %eax
+; CHECK-BASELINE-NEXT: movzbl 17(%r9), %ecx
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: andb 17(%r10), %cl
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 18(%r8), %al
-; CHECK-BASELINE-NEXT: movb 18(%r9), %cl
+; CHECK-BASELINE-NEXT: movzbl 18(%r8), %eax
+; CHECK-BASELINE-NEXT: movzbl 18(%r9), %ecx
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: andb 18(%r10), %cl
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 19(%r8), %al
-; CHECK-BASELINE-NEXT: movb 19(%r9), %cl
+; CHECK-BASELINE-NEXT: movzbl 19(%r8), %eax
+; CHECK-BASELINE-NEXT: movzbl 19(%r9), %ecx
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: andb 19(%r10), %cl
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 20(%r8), %al
-; CHECK-BASELINE-NEXT: movb 20(%r9), %cl
+; CHECK-BASELINE-NEXT: movzbl 20(%r8), %eax
+; CHECK-BASELINE-NEXT: movzbl 20(%r9), %ecx
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: andb 20(%r10), %cl
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 21(%r8), %al
-; CHECK-BASELINE-NEXT: movb 21(%r9), %r13b
+; CHECK-BASELINE-NEXT: movzbl 21(%r8), %eax
+; CHECK-BASELINE-NEXT: movzbl 21(%r9), %r13d
; CHECK-BASELINE-NEXT: xorb %al, %r13b
; CHECK-BASELINE-NEXT: andb 21(%r10), %r13b
; CHECK-BASELINE-NEXT: xorb %al, %r13b
-; CHECK-BASELINE-NEXT: movb 22(%r8), %al
-; CHECK-BASELINE-NEXT: movb 22(%r9), %r12b
+; CHECK-BASELINE-NEXT: movzbl 22(%r8), %eax
+; CHECK-BASELINE-NEXT: movzbl 22(%r9), %r12d
; CHECK-BASELINE-NEXT: xorb %al, %r12b
; CHECK-BASELINE-NEXT: andb 22(%r10), %r12b
; CHECK-BASELINE-NEXT: xorb %al, %r12b
-; CHECK-BASELINE-NEXT: movb 23(%r8), %al
-; CHECK-BASELINE-NEXT: movb 23(%r9), %r15b
+; CHECK-BASELINE-NEXT: movzbl 23(%r8), %eax
+; CHECK-BASELINE-NEXT: movzbl 23(%r9), %r15d
; CHECK-BASELINE-NEXT: xorb %al, %r15b
; CHECK-BASELINE-NEXT: andb 23(%r10), %r15b
; CHECK-BASELINE-NEXT: xorb %al, %r15b
-; CHECK-BASELINE-NEXT: movb 24(%r8), %al
-; CHECK-BASELINE-NEXT: movb 24(%r9), %r14b
+; CHECK-BASELINE-NEXT: movzbl 24(%r8), %eax
+; CHECK-BASELINE-NEXT: movzbl 24(%r9), %r14d
; CHECK-BASELINE-NEXT: xorb %al, %r14b
; CHECK-BASELINE-NEXT: andb 24(%r10), %r14b
; CHECK-BASELINE-NEXT: xorb %al, %r14b
-; CHECK-BASELINE-NEXT: movb 25(%r8), %al
-; CHECK-BASELINE-NEXT: movb 25(%r9), %bpl
+; CHECK-BASELINE-NEXT: movzbl 25(%r8), %eax
+; CHECK-BASELINE-NEXT: movzbl 25(%r9), %ebp
; CHECK-BASELINE-NEXT: xorb %al, %bpl
; CHECK-BASELINE-NEXT: andb 25(%r10), %bpl
; CHECK-BASELINE-NEXT: xorb %al, %bpl
-; CHECK-BASELINE-NEXT: movb 26(%r8), %al
-; CHECK-BASELINE-NEXT: movb 26(%r9), %dil
+; CHECK-BASELINE-NEXT: movzbl 26(%r8), %eax
+; CHECK-BASELINE-NEXT: movzbl 26(%r9), %edi
; CHECK-BASELINE-NEXT: xorb %al, %dil
; CHECK-BASELINE-NEXT: andb 26(%r10), %dil
; CHECK-BASELINE-NEXT: xorb %al, %dil
-; CHECK-BASELINE-NEXT: movb 27(%r8), %al
-; CHECK-BASELINE-NEXT: movb 27(%r9), %sil
+; CHECK-BASELINE-NEXT: movzbl 27(%r8), %eax
+; CHECK-BASELINE-NEXT: movzbl 27(%r9), %esi
; CHECK-BASELINE-NEXT: xorb %al, %sil
; CHECK-BASELINE-NEXT: andb 27(%r10), %sil
; CHECK-BASELINE-NEXT: xorb %al, %sil
-; CHECK-BASELINE-NEXT: movb 28(%r8), %al
-; CHECK-BASELINE-NEXT: movb 28(%r9), %dl
+; CHECK-BASELINE-NEXT: movzbl 28(%r8), %eax
+; CHECK-BASELINE-NEXT: movzbl 28(%r9), %edx
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: andb 28(%r10), %dl
; CHECK-BASELINE-NEXT: xorb %al, %dl
-; CHECK-BASELINE-NEXT: movb 29(%r8), %al
-; CHECK-BASELINE-NEXT: movb 29(%r9), %cl
+; CHECK-BASELINE-NEXT: movzbl 29(%r8), %eax
+; CHECK-BASELINE-NEXT: movzbl 29(%r9), %ecx
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: andb 29(%r10), %cl
; CHECK-BASELINE-NEXT: xorb %al, %cl
-; CHECK-BASELINE-NEXT: movb 30(%r8), %bl
-; CHECK-BASELINE-NEXT: movb 30(%r9), %al
+; CHECK-BASELINE-NEXT: movzbl 30(%r8), %ebx
+; CHECK-BASELINE-NEXT: movzbl 30(%r9), %eax
; CHECK-BASELINE-NEXT: xorb %bl, %al
; CHECK-BASELINE-NEXT: andb 30(%r10), %al
; CHECK-BASELINE-NEXT: xorb %bl, %al
-; CHECK-BASELINE-NEXT: movb 31(%r8), %r8b
-; CHECK-BASELINE-NEXT: movb 31(%r9), %bl
+; CHECK-BASELINE-NEXT: movzbl 31(%r8), %r8d
+; CHECK-BASELINE-NEXT: movzbl 31(%r9), %ebx
; CHECK-BASELINE-NEXT: xorb %r8b, %bl
; CHECK-BASELINE-NEXT: andb 31(%r10), %bl
; CHECK-BASELINE-NEXT: xorb %r8b, %bl
; CHECK-BASELINE-NEXT: movb %r15b, 23(%r11)
; CHECK-BASELINE-NEXT: movb %r12b, 22(%r11)
; CHECK-BASELINE-NEXT: movb %r13b, 21(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 20(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 19(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 18(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 17(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 16(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 15(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 14(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 13(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 12(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 11(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 10(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 9(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 8(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 7(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 6(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 5(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 4(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 3(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 2(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 1(%r11)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, (%r11)
; CHECK-BASELINE-NEXT: movq %r11, %rax
; CHECK-BASELINE-NEXT: popq %rbx
; CHECK-SSE1-NEXT: movq %rdx, %r8
; CHECK-SSE1-NEXT: movq %rsi, %r9
; CHECK-SSE1-NEXT: movq %rdi, %r11
-; CHECK-SSE1-NEXT: movb 15(%rdx), %al
+; CHECK-SSE1-NEXT: movzbl 15(%rdx), %eax
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 14(%rdx), %al
+; CHECK-SSE1-NEXT: movzbl 14(%rdx), %eax
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 13(%rdx), %al
+; CHECK-SSE1-NEXT: movzbl 13(%rdx), %eax
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 12(%rdx), %al
+; CHECK-SSE1-NEXT: movzbl 12(%rdx), %eax
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 11(%rdx), %al
+; CHECK-SSE1-NEXT: movzbl 11(%rdx), %eax
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 10(%rdx), %al
+; CHECK-SSE1-NEXT: movzbl 10(%rdx), %eax
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 9(%rdx), %bpl
-; CHECK-SSE1-NEXT: movb 8(%rdx), %r14b
-; CHECK-SSE1-NEXT: movb 7(%rdx), %r15b
-; CHECK-SSE1-NEXT: movb 6(%rdx), %r12b
-; CHECK-SSE1-NEXT: movb 5(%rdx), %sil
-; CHECK-SSE1-NEXT: movb 4(%rdx), %r13b
-; CHECK-SSE1-NEXT: movb 3(%rdx), %dl
-; CHECK-SSE1-NEXT: movb 2(%r8), %dil
-; CHECK-SSE1-NEXT: movb (%r8), %al
-; CHECK-SSE1-NEXT: movb 1(%r8), %cl
-; CHECK-SSE1-NEXT: movb (%r9), %bl
+; CHECK-SSE1-NEXT: movzbl 9(%rdx), %ebp
+; CHECK-SSE1-NEXT: movzbl 8(%rdx), %r14d
+; CHECK-SSE1-NEXT: movzbl 7(%rdx), %r15d
+; CHECK-SSE1-NEXT: movzbl 6(%rdx), %r12d
+; CHECK-SSE1-NEXT: movzbl 5(%rdx), %esi
+; CHECK-SSE1-NEXT: movzbl 4(%rdx), %r13d
+; CHECK-SSE1-NEXT: movzbl 3(%rdx), %edx
+; CHECK-SSE1-NEXT: movzbl 2(%r8), %edi
+; CHECK-SSE1-NEXT: movzbl (%r8), %eax
+; CHECK-SSE1-NEXT: movzbl 1(%r8), %ecx
+; CHECK-SSE1-NEXT: movzbl (%r9), %ebx
; CHECK-SSE1-NEXT: xorb %al, %bl
; CHECK-SSE1-NEXT: andb (%r10), %bl
; CHECK-SSE1-NEXT: xorb %al, %bl
; CHECK-SSE1-NEXT: movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 1(%r9), %al
+; CHECK-SSE1-NEXT: movzbl 1(%r9), %eax
; CHECK-SSE1-NEXT: xorb %cl, %al
; CHECK-SSE1-NEXT: andb 1(%r10), %al
; CHECK-SSE1-NEXT: xorb %cl, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 2(%r9), %al
+; CHECK-SSE1-NEXT: movzbl 2(%r9), %eax
; CHECK-SSE1-NEXT: xorb %dil, %al
; CHECK-SSE1-NEXT: andb 2(%r10), %al
; CHECK-SSE1-NEXT: xorb %dil, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 3(%r9), %al
+; CHECK-SSE1-NEXT: movzbl 3(%r9), %eax
; CHECK-SSE1-NEXT: xorb %dl, %al
; CHECK-SSE1-NEXT: andb 3(%r10), %al
; CHECK-SSE1-NEXT: xorb %dl, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 4(%r9), %al
+; CHECK-SSE1-NEXT: movzbl 4(%r9), %eax
; CHECK-SSE1-NEXT: xorb %r13b, %al
; CHECK-SSE1-NEXT: andb 4(%r10), %al
; CHECK-SSE1-NEXT: xorb %r13b, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 5(%r9), %al
+; CHECK-SSE1-NEXT: movzbl 5(%r9), %eax
; CHECK-SSE1-NEXT: xorb %sil, %al
; CHECK-SSE1-NEXT: andb 5(%r10), %al
; CHECK-SSE1-NEXT: xorb %sil, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 6(%r9), %al
+; CHECK-SSE1-NEXT: movzbl 6(%r9), %eax
; CHECK-SSE1-NEXT: xorb %r12b, %al
; CHECK-SSE1-NEXT: andb 6(%r10), %al
; CHECK-SSE1-NEXT: xorb %r12b, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 7(%r9), %al
+; CHECK-SSE1-NEXT: movzbl 7(%r9), %eax
; CHECK-SSE1-NEXT: xorb %r15b, %al
; CHECK-SSE1-NEXT: andb 7(%r10), %al
; CHECK-SSE1-NEXT: xorb %r15b, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 8(%r9), %al
+; CHECK-SSE1-NEXT: movzbl 8(%r9), %eax
; CHECK-SSE1-NEXT: xorb %r14b, %al
; CHECK-SSE1-NEXT: andb 8(%r10), %al
; CHECK-SSE1-NEXT: xorb %r14b, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 9(%r9), %al
+; CHECK-SSE1-NEXT: movzbl 9(%r9), %eax
; CHECK-SSE1-NEXT: xorb %bpl, %al
; CHECK-SSE1-NEXT: andb 9(%r10), %al
; CHECK-SSE1-NEXT: xorb %bpl, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 10(%r9), %al
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl 10(%r9), %eax
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
; CHECK-SSE1-NEXT: xorb %cl, %al
; CHECK-SSE1-NEXT: andb 10(%r10), %al
; CHECK-SSE1-NEXT: xorb %cl, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 11(%r9), %al
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl 11(%r9), %eax
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
; CHECK-SSE1-NEXT: xorb %cl, %al
; CHECK-SSE1-NEXT: andb 11(%r10), %al
; CHECK-SSE1-NEXT: xorb %cl, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 12(%r9), %al
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl 12(%r9), %eax
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
; CHECK-SSE1-NEXT: xorb %cl, %al
; CHECK-SSE1-NEXT: andb 12(%r10), %al
; CHECK-SSE1-NEXT: xorb %cl, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 13(%r9), %al
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl 13(%r9), %eax
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
; CHECK-SSE1-NEXT: xorb %cl, %al
; CHECK-SSE1-NEXT: andb 13(%r10), %al
; CHECK-SSE1-NEXT: xorb %cl, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 14(%r9), %al
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl 14(%r9), %eax
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
; CHECK-SSE1-NEXT: xorb %cl, %al
; CHECK-SSE1-NEXT: andb 14(%r10), %al
; CHECK-SSE1-NEXT: xorb %cl, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 15(%r9), %al
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl 15(%r9), %eax
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload
; CHECK-SSE1-NEXT: xorb %cl, %al
; CHECK-SSE1-NEXT: andb 15(%r10), %al
; CHECK-SSE1-NEXT: xorb %cl, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 16(%r8), %al
-; CHECK-SSE1-NEXT: movb 16(%r9), %cl
+; CHECK-SSE1-NEXT: movzbl 16(%r8), %eax
+; CHECK-SSE1-NEXT: movzbl 16(%r9), %ecx
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: andb 16(%r10), %cl
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 17(%r8), %al
-; CHECK-SSE1-NEXT: movb 17(%r9), %cl
+; CHECK-SSE1-NEXT: movzbl 17(%r8), %eax
+; CHECK-SSE1-NEXT: movzbl 17(%r9), %ecx
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: andb 17(%r10), %cl
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 18(%r8), %al
-; CHECK-SSE1-NEXT: movb 18(%r9), %cl
+; CHECK-SSE1-NEXT: movzbl 18(%r8), %eax
+; CHECK-SSE1-NEXT: movzbl 18(%r9), %ecx
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: andb 18(%r10), %cl
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 19(%r8), %al
-; CHECK-SSE1-NEXT: movb 19(%r9), %cl
+; CHECK-SSE1-NEXT: movzbl 19(%r8), %eax
+; CHECK-SSE1-NEXT: movzbl 19(%r9), %ecx
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: andb 19(%r10), %cl
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 20(%r8), %al
-; CHECK-SSE1-NEXT: movb 20(%r9), %cl
+; CHECK-SSE1-NEXT: movzbl 20(%r8), %eax
+; CHECK-SSE1-NEXT: movzbl 20(%r9), %ecx
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: andb 20(%r10), %cl
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 21(%r8), %al
-; CHECK-SSE1-NEXT: movb 21(%r9), %r13b
+; CHECK-SSE1-NEXT: movzbl 21(%r8), %eax
+; CHECK-SSE1-NEXT: movzbl 21(%r9), %r13d
; CHECK-SSE1-NEXT: xorb %al, %r13b
; CHECK-SSE1-NEXT: andb 21(%r10), %r13b
; CHECK-SSE1-NEXT: xorb %al, %r13b
-; CHECK-SSE1-NEXT: movb 22(%r8), %al
-; CHECK-SSE1-NEXT: movb 22(%r9), %r12b
+; CHECK-SSE1-NEXT: movzbl 22(%r8), %eax
+; CHECK-SSE1-NEXT: movzbl 22(%r9), %r12d
; CHECK-SSE1-NEXT: xorb %al, %r12b
; CHECK-SSE1-NEXT: andb 22(%r10), %r12b
; CHECK-SSE1-NEXT: xorb %al, %r12b
-; CHECK-SSE1-NEXT: movb 23(%r8), %al
-; CHECK-SSE1-NEXT: movb 23(%r9), %r15b
+; CHECK-SSE1-NEXT: movzbl 23(%r8), %eax
+; CHECK-SSE1-NEXT: movzbl 23(%r9), %r15d
; CHECK-SSE1-NEXT: xorb %al, %r15b
; CHECK-SSE1-NEXT: andb 23(%r10), %r15b
; CHECK-SSE1-NEXT: xorb %al, %r15b
-; CHECK-SSE1-NEXT: movb 24(%r8), %al
-; CHECK-SSE1-NEXT: movb 24(%r9), %r14b
+; CHECK-SSE1-NEXT: movzbl 24(%r8), %eax
+; CHECK-SSE1-NEXT: movzbl 24(%r9), %r14d
; CHECK-SSE1-NEXT: xorb %al, %r14b
; CHECK-SSE1-NEXT: andb 24(%r10), %r14b
; CHECK-SSE1-NEXT: xorb %al, %r14b
-; CHECK-SSE1-NEXT: movb 25(%r8), %al
-; CHECK-SSE1-NEXT: movb 25(%r9), %bpl
+; CHECK-SSE1-NEXT: movzbl 25(%r8), %eax
+; CHECK-SSE1-NEXT: movzbl 25(%r9), %ebp
; CHECK-SSE1-NEXT: xorb %al, %bpl
; CHECK-SSE1-NEXT: andb 25(%r10), %bpl
; CHECK-SSE1-NEXT: xorb %al, %bpl
-; CHECK-SSE1-NEXT: movb 26(%r8), %al
-; CHECK-SSE1-NEXT: movb 26(%r9), %dil
+; CHECK-SSE1-NEXT: movzbl 26(%r8), %eax
+; CHECK-SSE1-NEXT: movzbl 26(%r9), %edi
; CHECK-SSE1-NEXT: xorb %al, %dil
; CHECK-SSE1-NEXT: andb 26(%r10), %dil
; CHECK-SSE1-NEXT: xorb %al, %dil
-; CHECK-SSE1-NEXT: movb 27(%r8), %al
-; CHECK-SSE1-NEXT: movb 27(%r9), %sil
+; CHECK-SSE1-NEXT: movzbl 27(%r8), %eax
+; CHECK-SSE1-NEXT: movzbl 27(%r9), %esi
; CHECK-SSE1-NEXT: xorb %al, %sil
; CHECK-SSE1-NEXT: andb 27(%r10), %sil
; CHECK-SSE1-NEXT: xorb %al, %sil
-; CHECK-SSE1-NEXT: movb 28(%r8), %al
-; CHECK-SSE1-NEXT: movb 28(%r9), %dl
+; CHECK-SSE1-NEXT: movzbl 28(%r8), %eax
+; CHECK-SSE1-NEXT: movzbl 28(%r9), %edx
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: andb 28(%r10), %dl
; CHECK-SSE1-NEXT: xorb %al, %dl
-; CHECK-SSE1-NEXT: movb 29(%r8), %al
-; CHECK-SSE1-NEXT: movb 29(%r9), %cl
+; CHECK-SSE1-NEXT: movzbl 29(%r8), %eax
+; CHECK-SSE1-NEXT: movzbl 29(%r9), %ecx
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: andb 29(%r10), %cl
; CHECK-SSE1-NEXT: xorb %al, %cl
-; CHECK-SSE1-NEXT: movb 30(%r8), %bl
-; CHECK-SSE1-NEXT: movb 30(%r9), %al
+; CHECK-SSE1-NEXT: movzbl 30(%r8), %ebx
+; CHECK-SSE1-NEXT: movzbl 30(%r9), %eax
; CHECK-SSE1-NEXT: xorb %bl, %al
; CHECK-SSE1-NEXT: andb 30(%r10), %al
; CHECK-SSE1-NEXT: xorb %bl, %al
-; CHECK-SSE1-NEXT: movb 31(%r8), %r8b
-; CHECK-SSE1-NEXT: movb 31(%r9), %bl
+; CHECK-SSE1-NEXT: movzbl 31(%r8), %r8d
+; CHECK-SSE1-NEXT: movzbl 31(%r9), %ebx
; CHECK-SSE1-NEXT: xorb %r8b, %bl
; CHECK-SSE1-NEXT: andb 31(%r10), %bl
; CHECK-SSE1-NEXT: xorb %r8b, %bl
; CHECK-SSE1-NEXT: movb %r15b, 23(%r11)
; CHECK-SSE1-NEXT: movb %r12b, 22(%r11)
; CHECK-SSE1-NEXT: movb %r13b, 21(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 20(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 19(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 18(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 17(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 16(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 15(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 14(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 13(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 12(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 11(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 10(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 9(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 8(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 7(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 6(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 5(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 4(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 3(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 2(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 1(%r11)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, (%r11)
; CHECK-SSE1-NEXT: movq %r11, %rax
; CHECK-SSE1-NEXT: popq %rbx
; CHECK-BASELINE-LABEL: in_v4i8:
; CHECK-BASELINE: # %bb.0:
; CHECK-BASELINE-NEXT: movq %rdi, %rax
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
; CHECK-BASELINE-NEXT: xorl %r9d, %esi
; CHECK-BASELINE-NEXT: xorb %r11b, %dl
; CHECK-BASELINE-NEXT: xorb %r10b, %cl
; CHECK-SSE1-LABEL: in_v4i8:
; CHECK-SSE1: # %bb.0:
; CHECK-SSE1-NEXT: movq %rdi, %rax
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
; CHECK-SSE1-NEXT: xorl %r9d, %esi
; CHECK-SSE1-NEXT: xorb %r11b, %dl
; CHECK-SSE1-NEXT: xorb %r10b, %cl
; CHECK-BASELINE-NEXT: pushq %r13
; CHECK-BASELINE-NEXT: pushq %r12
; CHECK-BASELINE-NEXT: pushq %rbx
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
; CHECK-BASELINE-NEXT: xorb %r11b, %sil
; CHECK-BASELINE-NEXT: xorb %r12b, %dl
; CHECK-BASELINE-NEXT: xorb %r15b, %cl
; CHECK-BASELINE-NEXT: xorb %r14b, %r8b
; CHECK-BASELINE-NEXT: xorb %bpl, %r9b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d
; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %r13b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %bl
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-BASELINE-NEXT: xorb %r10b, %al
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b
; CHECK-SSE1-NEXT: pushq %r13
; CHECK-SSE1-NEXT: pushq %r12
; CHECK-SSE1-NEXT: pushq %rbx
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
; CHECK-SSE1-NEXT: xorb %r11b, %sil
; CHECK-SSE1-NEXT: xorb %r12b, %dl
; CHECK-SSE1-NEXT: xorb %r15b, %cl
; CHECK-SSE1-NEXT: xorb %r14b, %r8b
; CHECK-SSE1-NEXT: xorb %bpl, %r9b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d
; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %r13b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %bl
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-SSE1-NEXT: xorb %r10b, %al
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b
; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-BASELINE-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-BASELINE-NEXT: movq %rdi, %rdx
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; CHECK-BASELINE-NEXT: xorb %dil, %r9b
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b
; CHECK-BASELINE-NEXT: xorb %dil, %r9b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; CHECK-BASELINE-NEXT: xorb %r10b, %dil
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dil
; CHECK-BASELINE-NEXT: xorb %r10b, %dil
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
; CHECK-BASELINE-NEXT: xorb %r11b, %r10b
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b
; CHECK-BASELINE-NEXT: xorb %r11b, %r10b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
; CHECK-BASELINE-NEXT: xorb %bl, %r11b
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b
; CHECK-BASELINE-NEXT: xorb %bl, %r11b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
; CHECK-BASELINE-NEXT: xorb %r13b, %bl
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl
; CHECK-BASELINE-NEXT: xorb %r13b, %bl
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d
; CHECK-BASELINE-NEXT: xorb %r12b, %r13b
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r13b
; CHECK-BASELINE-NEXT: xorb %r12b, %r13b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d
; CHECK-BASELINE-NEXT: xorb %r15b, %r12b
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r12b
; CHECK-BASELINE-NEXT: xorb %r15b, %r12b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
; CHECK-BASELINE-NEXT: xorb %r14b, %r15b
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r15b
; CHECK-BASELINE-NEXT: xorb %r14b, %r15b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d
; CHECK-BASELINE-NEXT: xorb %bpl, %r14b
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r14b
; CHECK-BASELINE-NEXT: xorb %bpl, %r14b
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
; CHECK-BASELINE-NEXT: xorb %al, %bpl
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl
; CHECK-BASELINE-NEXT: xorb %al, %bpl
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-BASELINE-NEXT: xorb %cl, %al
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al
; CHECK-BASELINE-NEXT: xorb %cl, %al
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; CHECK-BASELINE-NEXT: xorb %sil, %cl
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl
; CHECK-BASELINE-NEXT: xorb %sil, %cl
; CHECK-BASELINE-NEXT: movb %r10b, 6(%rdx)
; CHECK-BASELINE-NEXT: movb %dil, 5(%rdx)
; CHECK-BASELINE-NEXT: movb %r9b, 4(%rdx)
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-BASELINE-NEXT: xorb %al, %r8b
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b
; CHECK-BASELINE-NEXT: xorb %al, %r8b
; CHECK-BASELINE-NEXT: movb %r8b, 3(%rdx)
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: movb %cl, 2(%rdx)
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: movb %cl, 1(%rdx)
-; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl
; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-SSE1-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-SSE1-NEXT: movq %rdi, %rdx
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; CHECK-SSE1-NEXT: xorb %dil, %r9b
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b
; CHECK-SSE1-NEXT: xorb %dil, %r9b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
; CHECK-SSE1-NEXT: xorb %r10b, %dil
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dil
; CHECK-SSE1-NEXT: xorb %r10b, %dil
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
; CHECK-SSE1-NEXT: xorb %r11b, %r10b
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b
; CHECK-SSE1-NEXT: xorb %r11b, %r10b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
; CHECK-SSE1-NEXT: xorb %bl, %r11b
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b
; CHECK-SSE1-NEXT: xorb %bl, %r11b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx
; CHECK-SSE1-NEXT: xorb %r13b, %bl
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl
; CHECK-SSE1-NEXT: xorb %r13b, %bl
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d
; CHECK-SSE1-NEXT: xorb %r12b, %r13b
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r13b
; CHECK-SSE1-NEXT: xorb %r12b, %r13b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d
; CHECK-SSE1-NEXT: xorb %r15b, %r12b
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r12b
; CHECK-SSE1-NEXT: xorb %r15b, %r12b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d
; CHECK-SSE1-NEXT: xorb %r14b, %r15b
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r15b
; CHECK-SSE1-NEXT: xorb %r14b, %r15b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d
; CHECK-SSE1-NEXT: xorb %bpl, %r14b
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r14b
; CHECK-SSE1-NEXT: xorb %bpl, %r14b
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
; CHECK-SSE1-NEXT: xorb %al, %bpl
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl
; CHECK-SSE1-NEXT: xorb %al, %bpl
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-SSE1-NEXT: xorb %cl, %al
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al
; CHECK-SSE1-NEXT: xorb %cl, %al
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
; CHECK-SSE1-NEXT: xorb %sil, %cl
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl
; CHECK-SSE1-NEXT: xorb %sil, %cl
; CHECK-SSE1-NEXT: movb %r10b, 6(%rdx)
; CHECK-SSE1-NEXT: movb %dil, 5(%rdx)
; CHECK-SSE1-NEXT: movb %r9b, 4(%rdx)
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-SSE1-NEXT: xorb %al, %r8b
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b
; CHECK-SSE1-NEXT: xorb %al, %r8b
; CHECK-SSE1-NEXT: movb %r8b, 3(%rdx)
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: movb %cl, 2(%rdx)
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: movb %cl, 1(%rdx)
-; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al
+; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl
; CHECK-BASELINE-NEXT: movq %rdx, %r13
; CHECK-BASELINE-NEXT: movq %rsi, %rbx
; CHECK-BASELINE-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-BASELINE-NEXT: movb 15(%rdx), %r12b
-; CHECK-BASELINE-NEXT: movb 14(%rdx), %al
+; CHECK-BASELINE-NEXT: movzbl 15(%rdx), %r12d
+; CHECK-BASELINE-NEXT: movzbl 14(%rdx), %eax
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 13(%rdx), %al
+; CHECK-BASELINE-NEXT: movzbl 13(%rdx), %eax
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 12(%rdx), %al
+; CHECK-BASELINE-NEXT: movzbl 12(%rdx), %eax
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 11(%rdx), %al
+; CHECK-BASELINE-NEXT: movzbl 11(%rdx), %eax
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 10(%rdx), %al
+; CHECK-BASELINE-NEXT: movzbl 10(%rdx), %eax
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 9(%rdx), %r9b
-; CHECK-BASELINE-NEXT: movb 8(%rdx), %r10b
-; CHECK-BASELINE-NEXT: movb 7(%rdx), %r11b
-; CHECK-BASELINE-NEXT: movb 6(%rdx), %r8b
-; CHECK-BASELINE-NEXT: movb 5(%rdx), %bpl
-; CHECK-BASELINE-NEXT: movb 4(%rdx), %sil
-; CHECK-BASELINE-NEXT: movb 3(%rdx), %dil
-; CHECK-BASELINE-NEXT: movb 2(%rdx), %r14b
-; CHECK-BASELINE-NEXT: movb (%rdx), %al
-; CHECK-BASELINE-NEXT: movb 1(%rdx), %r15b
-; CHECK-BASELINE-NEXT: movb (%rbx), %dl
+; CHECK-BASELINE-NEXT: movzbl 9(%rdx), %r9d
+; CHECK-BASELINE-NEXT: movzbl 8(%rdx), %r10d
+; CHECK-BASELINE-NEXT: movzbl 7(%rdx), %r11d
+; CHECK-BASELINE-NEXT: movzbl 6(%rdx), %r8d
+; CHECK-BASELINE-NEXT: movzbl 5(%rdx), %ebp
+; CHECK-BASELINE-NEXT: movzbl 4(%rdx), %esi
+; CHECK-BASELINE-NEXT: movzbl 3(%rdx), %edi
+; CHECK-BASELINE-NEXT: movzbl 2(%rdx), %r14d
+; CHECK-BASELINE-NEXT: movzbl (%rdx), %eax
+; CHECK-BASELINE-NEXT: movzbl 1(%rdx), %r15d
+; CHECK-BASELINE-NEXT: movzbl (%rbx), %edx
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: andb (%rcx), %dl
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 1(%rbx), %al
+; CHECK-BASELINE-NEXT: movzbl 1(%rbx), %eax
; CHECK-BASELINE-NEXT: xorb %r15b, %al
; CHECK-BASELINE-NEXT: andb 1(%rcx), %al
; CHECK-BASELINE-NEXT: xorb %r15b, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 2(%rbx), %al
+; CHECK-BASELINE-NEXT: movzbl 2(%rbx), %eax
; CHECK-BASELINE-NEXT: xorb %r14b, %al
; CHECK-BASELINE-NEXT: andb 2(%rcx), %al
; CHECK-BASELINE-NEXT: xorb %r14b, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 3(%rbx), %al
+; CHECK-BASELINE-NEXT: movzbl 3(%rbx), %eax
; CHECK-BASELINE-NEXT: xorb %dil, %al
; CHECK-BASELINE-NEXT: andb 3(%rcx), %al
; CHECK-BASELINE-NEXT: xorb %dil, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 4(%rbx), %al
+; CHECK-BASELINE-NEXT: movzbl 4(%rbx), %eax
; CHECK-BASELINE-NEXT: xorb %sil, %al
; CHECK-BASELINE-NEXT: andb 4(%rcx), %al
; CHECK-BASELINE-NEXT: xorb %sil, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 5(%rbx), %al
+; CHECK-BASELINE-NEXT: movzbl 5(%rbx), %eax
; CHECK-BASELINE-NEXT: xorb %bpl, %al
; CHECK-BASELINE-NEXT: andb 5(%rcx), %al
; CHECK-BASELINE-NEXT: xorb %bpl, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 6(%rbx), %al
+; CHECK-BASELINE-NEXT: movzbl 6(%rbx), %eax
; CHECK-BASELINE-NEXT: xorb %r8b, %al
; CHECK-BASELINE-NEXT: andb 6(%rcx), %al
; CHECK-BASELINE-NEXT: xorb %r8b, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 7(%rbx), %al
+; CHECK-BASELINE-NEXT: movzbl 7(%rbx), %eax
; CHECK-BASELINE-NEXT: xorb %r11b, %al
; CHECK-BASELINE-NEXT: andb 7(%rcx), %al
; CHECK-BASELINE-NEXT: xorb %r11b, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 8(%rbx), %al
+; CHECK-BASELINE-NEXT: movzbl 8(%rbx), %eax
; CHECK-BASELINE-NEXT: xorb %r10b, %al
; CHECK-BASELINE-NEXT: andb 8(%rcx), %al
; CHECK-BASELINE-NEXT: xorb %r10b, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 9(%rbx), %al
+; CHECK-BASELINE-NEXT: movzbl 9(%rbx), %eax
; CHECK-BASELINE-NEXT: xorb %r9b, %al
; CHECK-BASELINE-NEXT: andb 9(%rcx), %al
; CHECK-BASELINE-NEXT: xorb %r9b, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 10(%rbx), %dl
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl 10(%rbx), %edx
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: andb 10(%rcx), %dl
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 11(%rbx), %dl
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl 11(%rbx), %edx
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: andb 11(%rcx), %dl
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 12(%rbx), %dl
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl 12(%rbx), %edx
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: andb 12(%rcx), %dl
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 13(%rbx), %dl
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl 13(%rbx), %edx
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: andb 13(%rcx), %dl
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 14(%rbx), %dl
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl 14(%rbx), %edx
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: andb 14(%rcx), %dl
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 15(%rbx), %al
+; CHECK-BASELINE-NEXT: movzbl 15(%rbx), %eax
; CHECK-BASELINE-NEXT: xorb %r12b, %al
; CHECK-BASELINE-NEXT: andb 15(%rcx), %al
; CHECK-BASELINE-NEXT: xorb %r12b, %al
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 16(%r13), %al
-; CHECK-BASELINE-NEXT: movb 16(%rbx), %dl
+; CHECK-BASELINE-NEXT: movzbl 16(%r13), %eax
+; CHECK-BASELINE-NEXT: movzbl 16(%rbx), %edx
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: andb 16(%rcx), %dl
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 17(%r13), %al
-; CHECK-BASELINE-NEXT: movb 17(%rbx), %dl
+; CHECK-BASELINE-NEXT: movzbl 17(%r13), %eax
+; CHECK-BASELINE-NEXT: movzbl 17(%rbx), %edx
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: andb 17(%rcx), %dl
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 18(%r13), %al
-; CHECK-BASELINE-NEXT: movb 18(%rbx), %dl
+; CHECK-BASELINE-NEXT: movzbl 18(%r13), %eax
+; CHECK-BASELINE-NEXT: movzbl 18(%rbx), %edx
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: andb 18(%rcx), %dl
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 19(%r13), %al
-; CHECK-BASELINE-NEXT: movb 19(%rbx), %r12b
+; CHECK-BASELINE-NEXT: movzbl 19(%r13), %eax
+; CHECK-BASELINE-NEXT: movzbl 19(%rbx), %r12d
; CHECK-BASELINE-NEXT: xorb %al, %r12b
; CHECK-BASELINE-NEXT: andb 19(%rcx), %r12b
; CHECK-BASELINE-NEXT: xorb %al, %r12b
-; CHECK-BASELINE-NEXT: movb 20(%r13), %al
-; CHECK-BASELINE-NEXT: movb 20(%rbx), %r15b
+; CHECK-BASELINE-NEXT: movzbl 20(%r13), %eax
+; CHECK-BASELINE-NEXT: movzbl 20(%rbx), %r15d
; CHECK-BASELINE-NEXT: xorb %al, %r15b
; CHECK-BASELINE-NEXT: andb 20(%rcx), %r15b
; CHECK-BASELINE-NEXT: movq %rcx, %rsi
; CHECK-BASELINE-NEXT: xorb %al, %r15b
-; CHECK-BASELINE-NEXT: movb 21(%r13), %al
-; CHECK-BASELINE-NEXT: movb 21(%rbx), %r14b
+; CHECK-BASELINE-NEXT: movzbl 21(%r13), %eax
+; CHECK-BASELINE-NEXT: movzbl 21(%rbx), %r14d
; CHECK-BASELINE-NEXT: xorb %al, %r14b
; CHECK-BASELINE-NEXT: andb 21(%rcx), %r14b
; CHECK-BASELINE-NEXT: xorb %al, %r14b
-; CHECK-BASELINE-NEXT: movb 22(%r13), %al
-; CHECK-BASELINE-NEXT: movb 22(%rbx), %bpl
+; CHECK-BASELINE-NEXT: movzbl 22(%r13), %eax
+; CHECK-BASELINE-NEXT: movzbl 22(%rbx), %ebp
; CHECK-BASELINE-NEXT: xorb %al, %bpl
; CHECK-BASELINE-NEXT: andb 22(%rcx), %bpl
; CHECK-BASELINE-NEXT: xorb %al, %bpl
-; CHECK-BASELINE-NEXT: movb 23(%r13), %al
-; CHECK-BASELINE-NEXT: movb 23(%rbx), %r11b
+; CHECK-BASELINE-NEXT: movzbl 23(%r13), %eax
+; CHECK-BASELINE-NEXT: movzbl 23(%rbx), %r11d
; CHECK-BASELINE-NEXT: xorb %al, %r11b
; CHECK-BASELINE-NEXT: andb 23(%rcx), %r11b
; CHECK-BASELINE-NEXT: xorb %al, %r11b
-; CHECK-BASELINE-NEXT: movb 24(%r13), %al
-; CHECK-BASELINE-NEXT: movb 24(%rbx), %r10b
+; CHECK-BASELINE-NEXT: movzbl 24(%r13), %eax
+; CHECK-BASELINE-NEXT: movzbl 24(%rbx), %r10d
; CHECK-BASELINE-NEXT: xorb %al, %r10b
; CHECK-BASELINE-NEXT: andb 24(%rcx), %r10b
; CHECK-BASELINE-NEXT: xorb %al, %r10b
-; CHECK-BASELINE-NEXT: movb 25(%r13), %al
-; CHECK-BASELINE-NEXT: movb 25(%rbx), %r9b
+; CHECK-BASELINE-NEXT: movzbl 25(%r13), %eax
+; CHECK-BASELINE-NEXT: movzbl 25(%rbx), %r9d
; CHECK-BASELINE-NEXT: xorb %al, %r9b
; CHECK-BASELINE-NEXT: andb 25(%rcx), %r9b
; CHECK-BASELINE-NEXT: xorb %al, %r9b
-; CHECK-BASELINE-NEXT: movb 26(%r13), %al
-; CHECK-BASELINE-NEXT: movb 26(%rbx), %r8b
+; CHECK-BASELINE-NEXT: movzbl 26(%r13), %eax
+; CHECK-BASELINE-NEXT: movzbl 26(%rbx), %r8d
; CHECK-BASELINE-NEXT: xorb %al, %r8b
; CHECK-BASELINE-NEXT: andb 26(%rcx), %r8b
; CHECK-BASELINE-NEXT: xorb %al, %r8b
-; CHECK-BASELINE-NEXT: movb 27(%r13), %al
-; CHECK-BASELINE-NEXT: movb 27(%rbx), %dil
+; CHECK-BASELINE-NEXT: movzbl 27(%r13), %eax
+; CHECK-BASELINE-NEXT: movzbl 27(%rbx), %edi
; CHECK-BASELINE-NEXT: xorb %al, %dil
; CHECK-BASELINE-NEXT: andb 27(%rcx), %dil
; CHECK-BASELINE-NEXT: xorb %al, %dil
-; CHECK-BASELINE-NEXT: movb 28(%r13), %al
-; CHECK-BASELINE-NEXT: movb 28(%rbx), %dl
+; CHECK-BASELINE-NEXT: movzbl 28(%r13), %eax
+; CHECK-BASELINE-NEXT: movzbl 28(%rbx), %edx
; CHECK-BASELINE-NEXT: xorb %al, %dl
; CHECK-BASELINE-NEXT: andb 28(%rcx), %dl
; CHECK-BASELINE-NEXT: xorb %al, %dl
-; CHECK-BASELINE-NEXT: movb 29(%r13), %al
-; CHECK-BASELINE-NEXT: movb 29(%rbx), %cl
+; CHECK-BASELINE-NEXT: movzbl 29(%r13), %eax
+; CHECK-BASELINE-NEXT: movzbl 29(%rbx), %ecx
; CHECK-BASELINE-NEXT: xorb %al, %cl
; CHECK-BASELINE-NEXT: andb 29(%rsi), %cl
; CHECK-BASELINE-NEXT: xorb %al, %cl
-; CHECK-BASELINE-NEXT: movb 30(%r13), %al
+; CHECK-BASELINE-NEXT: movzbl 30(%r13), %eax
; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-BASELINE-NEXT: movb 30(%rbx), %al
+; CHECK-BASELINE-NEXT: movzbl 30(%rbx), %eax
; CHECK-BASELINE-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: andb 30(%rsi), %al
; CHECK-BASELINE-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
-; CHECK-BASELINE-NEXT: movb 31(%r13), %r13b
-; CHECK-BASELINE-NEXT: movb 31(%rbx), %bl
+; CHECK-BASELINE-NEXT: movzbl 31(%r13), %r13d
+; CHECK-BASELINE-NEXT: movzbl 31(%rbx), %ebx
; CHECK-BASELINE-NEXT: xorb %r13b, %bl
; CHECK-BASELINE-NEXT: andb 31(%rsi), %bl
; CHECK-BASELINE-NEXT: xorb %r13b, %bl
; CHECK-BASELINE-NEXT: movb %r14b, 21(%r13)
; CHECK-BASELINE-NEXT: movb %r15b, 20(%r13)
; CHECK-BASELINE-NEXT: movb %r12b, 19(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 18(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 17(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 16(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 15(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 14(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 13(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 12(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 11(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 10(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 9(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 8(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 7(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 6(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 5(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 4(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 3(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 2(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, 1(%r13)
-; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-BASELINE-NEXT: movb %al, (%r13)
; CHECK-BASELINE-NEXT: movq %r13, %rax
; CHECK-BASELINE-NEXT: popq %rbx
; CHECK-SSE1-NEXT: movq %rdx, %r13
; CHECK-SSE1-NEXT: movq %rsi, %rbx
; CHECK-SSE1-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-SSE1-NEXT: movb 15(%rdx), %r12b
-; CHECK-SSE1-NEXT: movb 14(%rdx), %al
+; CHECK-SSE1-NEXT: movzbl 15(%rdx), %r12d
+; CHECK-SSE1-NEXT: movzbl 14(%rdx), %eax
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 13(%rdx), %al
+; CHECK-SSE1-NEXT: movzbl 13(%rdx), %eax
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 12(%rdx), %al
+; CHECK-SSE1-NEXT: movzbl 12(%rdx), %eax
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 11(%rdx), %al
+; CHECK-SSE1-NEXT: movzbl 11(%rdx), %eax
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 10(%rdx), %al
+; CHECK-SSE1-NEXT: movzbl 10(%rdx), %eax
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 9(%rdx), %r9b
-; CHECK-SSE1-NEXT: movb 8(%rdx), %r10b
-; CHECK-SSE1-NEXT: movb 7(%rdx), %r11b
-; CHECK-SSE1-NEXT: movb 6(%rdx), %r8b
-; CHECK-SSE1-NEXT: movb 5(%rdx), %bpl
-; CHECK-SSE1-NEXT: movb 4(%rdx), %sil
-; CHECK-SSE1-NEXT: movb 3(%rdx), %dil
-; CHECK-SSE1-NEXT: movb 2(%rdx), %r14b
-; CHECK-SSE1-NEXT: movb (%rdx), %al
-; CHECK-SSE1-NEXT: movb 1(%rdx), %r15b
-; CHECK-SSE1-NEXT: movb (%rbx), %dl
+; CHECK-SSE1-NEXT: movzbl 9(%rdx), %r9d
+; CHECK-SSE1-NEXT: movzbl 8(%rdx), %r10d
+; CHECK-SSE1-NEXT: movzbl 7(%rdx), %r11d
+; CHECK-SSE1-NEXT: movzbl 6(%rdx), %r8d
+; CHECK-SSE1-NEXT: movzbl 5(%rdx), %ebp
+; CHECK-SSE1-NEXT: movzbl 4(%rdx), %esi
+; CHECK-SSE1-NEXT: movzbl 3(%rdx), %edi
+; CHECK-SSE1-NEXT: movzbl 2(%rdx), %r14d
+; CHECK-SSE1-NEXT: movzbl (%rdx), %eax
+; CHECK-SSE1-NEXT: movzbl 1(%rdx), %r15d
+; CHECK-SSE1-NEXT: movzbl (%rbx), %edx
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: andb (%rcx), %dl
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 1(%rbx), %al
+; CHECK-SSE1-NEXT: movzbl 1(%rbx), %eax
; CHECK-SSE1-NEXT: xorb %r15b, %al
; CHECK-SSE1-NEXT: andb 1(%rcx), %al
; CHECK-SSE1-NEXT: xorb %r15b, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 2(%rbx), %al
+; CHECK-SSE1-NEXT: movzbl 2(%rbx), %eax
; CHECK-SSE1-NEXT: xorb %r14b, %al
; CHECK-SSE1-NEXT: andb 2(%rcx), %al
; CHECK-SSE1-NEXT: xorb %r14b, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 3(%rbx), %al
+; CHECK-SSE1-NEXT: movzbl 3(%rbx), %eax
; CHECK-SSE1-NEXT: xorb %dil, %al
; CHECK-SSE1-NEXT: andb 3(%rcx), %al
; CHECK-SSE1-NEXT: xorb %dil, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 4(%rbx), %al
+; CHECK-SSE1-NEXT: movzbl 4(%rbx), %eax
; CHECK-SSE1-NEXT: xorb %sil, %al
; CHECK-SSE1-NEXT: andb 4(%rcx), %al
; CHECK-SSE1-NEXT: xorb %sil, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 5(%rbx), %al
+; CHECK-SSE1-NEXT: movzbl 5(%rbx), %eax
; CHECK-SSE1-NEXT: xorb %bpl, %al
; CHECK-SSE1-NEXT: andb 5(%rcx), %al
; CHECK-SSE1-NEXT: xorb %bpl, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 6(%rbx), %al
+; CHECK-SSE1-NEXT: movzbl 6(%rbx), %eax
; CHECK-SSE1-NEXT: xorb %r8b, %al
; CHECK-SSE1-NEXT: andb 6(%rcx), %al
; CHECK-SSE1-NEXT: xorb %r8b, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 7(%rbx), %al
+; CHECK-SSE1-NEXT: movzbl 7(%rbx), %eax
; CHECK-SSE1-NEXT: xorb %r11b, %al
; CHECK-SSE1-NEXT: andb 7(%rcx), %al
; CHECK-SSE1-NEXT: xorb %r11b, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 8(%rbx), %al
+; CHECK-SSE1-NEXT: movzbl 8(%rbx), %eax
; CHECK-SSE1-NEXT: xorb %r10b, %al
; CHECK-SSE1-NEXT: andb 8(%rcx), %al
; CHECK-SSE1-NEXT: xorb %r10b, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 9(%rbx), %al
+; CHECK-SSE1-NEXT: movzbl 9(%rbx), %eax
; CHECK-SSE1-NEXT: xorb %r9b, %al
; CHECK-SSE1-NEXT: andb 9(%rcx), %al
; CHECK-SSE1-NEXT: xorb %r9b, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 10(%rbx), %dl
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl 10(%rbx), %edx
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: andb 10(%rcx), %dl
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 11(%rbx), %dl
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl 11(%rbx), %edx
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: andb 11(%rcx), %dl
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 12(%rbx), %dl
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl 12(%rbx), %edx
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: andb 12(%rcx), %dl
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 13(%rbx), %dl
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl 13(%rbx), %edx
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: andb 13(%rcx), %dl
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 14(%rbx), %dl
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl 14(%rbx), %edx
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: andb 14(%rcx), %dl
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 15(%rbx), %al
+; CHECK-SSE1-NEXT: movzbl 15(%rbx), %eax
; CHECK-SSE1-NEXT: xorb %r12b, %al
; CHECK-SSE1-NEXT: andb 15(%rcx), %al
; CHECK-SSE1-NEXT: xorb %r12b, %al
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 16(%r13), %al
-; CHECK-SSE1-NEXT: movb 16(%rbx), %dl
+; CHECK-SSE1-NEXT: movzbl 16(%r13), %eax
+; CHECK-SSE1-NEXT: movzbl 16(%rbx), %edx
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: andb 16(%rcx), %dl
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 17(%r13), %al
-; CHECK-SSE1-NEXT: movb 17(%rbx), %dl
+; CHECK-SSE1-NEXT: movzbl 17(%r13), %eax
+; CHECK-SSE1-NEXT: movzbl 17(%rbx), %edx
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: andb 17(%rcx), %dl
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 18(%r13), %al
-; CHECK-SSE1-NEXT: movb 18(%rbx), %dl
+; CHECK-SSE1-NEXT: movzbl 18(%r13), %eax
+; CHECK-SSE1-NEXT: movzbl 18(%rbx), %edx
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: andb 18(%rcx), %dl
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 19(%r13), %al
-; CHECK-SSE1-NEXT: movb 19(%rbx), %r12b
+; CHECK-SSE1-NEXT: movzbl 19(%r13), %eax
+; CHECK-SSE1-NEXT: movzbl 19(%rbx), %r12d
; CHECK-SSE1-NEXT: xorb %al, %r12b
; CHECK-SSE1-NEXT: andb 19(%rcx), %r12b
; CHECK-SSE1-NEXT: xorb %al, %r12b
-; CHECK-SSE1-NEXT: movb 20(%r13), %al
-; CHECK-SSE1-NEXT: movb 20(%rbx), %r15b
+; CHECK-SSE1-NEXT: movzbl 20(%r13), %eax
+; CHECK-SSE1-NEXT: movzbl 20(%rbx), %r15d
; CHECK-SSE1-NEXT: xorb %al, %r15b
; CHECK-SSE1-NEXT: andb 20(%rcx), %r15b
; CHECK-SSE1-NEXT: movq %rcx, %rsi
; CHECK-SSE1-NEXT: xorb %al, %r15b
-; CHECK-SSE1-NEXT: movb 21(%r13), %al
-; CHECK-SSE1-NEXT: movb 21(%rbx), %r14b
+; CHECK-SSE1-NEXT: movzbl 21(%r13), %eax
+; CHECK-SSE1-NEXT: movzbl 21(%rbx), %r14d
; CHECK-SSE1-NEXT: xorb %al, %r14b
; CHECK-SSE1-NEXT: andb 21(%rcx), %r14b
; CHECK-SSE1-NEXT: xorb %al, %r14b
-; CHECK-SSE1-NEXT: movb 22(%r13), %al
-; CHECK-SSE1-NEXT: movb 22(%rbx), %bpl
+; CHECK-SSE1-NEXT: movzbl 22(%r13), %eax
+; CHECK-SSE1-NEXT: movzbl 22(%rbx), %ebp
; CHECK-SSE1-NEXT: xorb %al, %bpl
; CHECK-SSE1-NEXT: andb 22(%rcx), %bpl
; CHECK-SSE1-NEXT: xorb %al, %bpl
-; CHECK-SSE1-NEXT: movb 23(%r13), %al
-; CHECK-SSE1-NEXT: movb 23(%rbx), %r11b
+; CHECK-SSE1-NEXT: movzbl 23(%r13), %eax
+; CHECK-SSE1-NEXT: movzbl 23(%rbx), %r11d
; CHECK-SSE1-NEXT: xorb %al, %r11b
; CHECK-SSE1-NEXT: andb 23(%rcx), %r11b
; CHECK-SSE1-NEXT: xorb %al, %r11b
-; CHECK-SSE1-NEXT: movb 24(%r13), %al
-; CHECK-SSE1-NEXT: movb 24(%rbx), %r10b
+; CHECK-SSE1-NEXT: movzbl 24(%r13), %eax
+; CHECK-SSE1-NEXT: movzbl 24(%rbx), %r10d
; CHECK-SSE1-NEXT: xorb %al, %r10b
; CHECK-SSE1-NEXT: andb 24(%rcx), %r10b
; CHECK-SSE1-NEXT: xorb %al, %r10b
-; CHECK-SSE1-NEXT: movb 25(%r13), %al
-; CHECK-SSE1-NEXT: movb 25(%rbx), %r9b
+; CHECK-SSE1-NEXT: movzbl 25(%r13), %eax
+; CHECK-SSE1-NEXT: movzbl 25(%rbx), %r9d
; CHECK-SSE1-NEXT: xorb %al, %r9b
; CHECK-SSE1-NEXT: andb 25(%rcx), %r9b
; CHECK-SSE1-NEXT: xorb %al, %r9b
-; CHECK-SSE1-NEXT: movb 26(%r13), %al
-; CHECK-SSE1-NEXT: movb 26(%rbx), %r8b
+; CHECK-SSE1-NEXT: movzbl 26(%r13), %eax
+; CHECK-SSE1-NEXT: movzbl 26(%rbx), %r8d
; CHECK-SSE1-NEXT: xorb %al, %r8b
; CHECK-SSE1-NEXT: andb 26(%rcx), %r8b
; CHECK-SSE1-NEXT: xorb %al, %r8b
-; CHECK-SSE1-NEXT: movb 27(%r13), %al
-; CHECK-SSE1-NEXT: movb 27(%rbx), %dil
+; CHECK-SSE1-NEXT: movzbl 27(%r13), %eax
+; CHECK-SSE1-NEXT: movzbl 27(%rbx), %edi
; CHECK-SSE1-NEXT: xorb %al, %dil
; CHECK-SSE1-NEXT: andb 27(%rcx), %dil
; CHECK-SSE1-NEXT: xorb %al, %dil
-; CHECK-SSE1-NEXT: movb 28(%r13), %al
-; CHECK-SSE1-NEXT: movb 28(%rbx), %dl
+; CHECK-SSE1-NEXT: movzbl 28(%r13), %eax
+; CHECK-SSE1-NEXT: movzbl 28(%rbx), %edx
; CHECK-SSE1-NEXT: xorb %al, %dl
; CHECK-SSE1-NEXT: andb 28(%rcx), %dl
; CHECK-SSE1-NEXT: xorb %al, %dl
-; CHECK-SSE1-NEXT: movb 29(%r13), %al
-; CHECK-SSE1-NEXT: movb 29(%rbx), %cl
+; CHECK-SSE1-NEXT: movzbl 29(%r13), %eax
+; CHECK-SSE1-NEXT: movzbl 29(%rbx), %ecx
; CHECK-SSE1-NEXT: xorb %al, %cl
; CHECK-SSE1-NEXT: andb 29(%rsi), %cl
; CHECK-SSE1-NEXT: xorb %al, %cl
-; CHECK-SSE1-NEXT: movb 30(%r13), %al
+; CHECK-SSE1-NEXT: movzbl 30(%r13), %eax
; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
-; CHECK-SSE1-NEXT: movb 30(%rbx), %al
+; CHECK-SSE1-NEXT: movzbl 30(%rbx), %eax
; CHECK-SSE1-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
; CHECK-SSE1-NEXT: andb 30(%rsi), %al
; CHECK-SSE1-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload
-; CHECK-SSE1-NEXT: movb 31(%r13), %r13b
-; CHECK-SSE1-NEXT: movb 31(%rbx), %bl
+; CHECK-SSE1-NEXT: movzbl 31(%r13), %r13d
+; CHECK-SSE1-NEXT: movzbl 31(%rbx), %ebx
; CHECK-SSE1-NEXT: xorb %r13b, %bl
; CHECK-SSE1-NEXT: andb 31(%rsi), %bl
; CHECK-SSE1-NEXT: xorb %r13b, %bl
; CHECK-SSE1-NEXT: movb %r14b, 21(%r13)
; CHECK-SSE1-NEXT: movb %r15b, 20(%r13)
; CHECK-SSE1-NEXT: movb %r12b, 19(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 18(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 17(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 16(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 15(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 14(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 13(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 12(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 11(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 10(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 9(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 8(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 7(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 6(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 5(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 4(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 3(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 2(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, 1(%r13)
-; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-SSE1-NEXT: movb %al, (%r13)
; CHECK-SSE1-NEXT: movq %r13, %rax
; CHECK-SSE1-NEXT: popq %rbx
define i25 @shift_left_pow_2(i25 %x, i25 %y) {
; X86-LABEL: shift_left_pow_2:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1, %eax
; X86-NEXT: shll %cl, %eax
; X86-NEXT: addl $33554431, %eax # imm = 0x1FFFFFF
define i16 @shift_right_pow_2(i16 %x, i16 %y) {
; X86-LABEL: shift_right_pow_2:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $32768, %eax # imm = 0x8000
; X86-NEXT: shrl %cl, %eax
; X86-NEXT: decl %eax
define i8 @and_pow_2(i8 %x, i8 %y) {
; X86-LABEL: and_pow_2:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $4, %cl
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: divb %cl
; SSE2-NEXT: andps %xmm1, %xmm3
; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl
-; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_urem_vec:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %eax, %edx
; X86-NEXT: shll %cl, %edx
; X86-NEXT: movzwl %dx, %edx
; X86-LABEL: func2:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl %eax, %eax
; X86-NEXT: movl %eax, %edx
; X86-LABEL: func4:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: andb $15, %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shlb $4, %al
; X86-NEXT: movl %eax, %edx
; X86-NEXT: shlb %cl, %dl
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %esi, %edi
; X86-LABEL: func6:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll $14, %eax
; X86-NEXT: movl %eax, %edx
; X86-LABEL: func7:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: shll %cl, %edx
; X86-LABEL: func8:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: shlb %cl, %dl
; X86-NEXT: movzbl %dl, %esi
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
-; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: cmpl %edi, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: cmovnel %ebx, %edx
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shll %cl, %ebp
; X86-NEXT: movl %ebp, %edi
; X86-NEXT: shrl %cl, %edi
define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind {
; X86-LABEL: func8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subb {{[0-9]+}}(%esp), %al
; X86-NEXT: movzbl %al, %eax
define zeroext i4 @func3(i4 zeroext %x, i4 zeroext %y) nounwind {
; X86-LABEL: func3:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subb {{[0-9]+}}(%esp), %al
; X86-NEXT: movzbl %al, %eax
define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind {
; X86-LABEL: func8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mulb {{[0-9]+}}(%esp)
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: subb %al, %cl
define zeroext i4 @func4(i4 zeroext %x, i4 zeroext %y, i4 zeroext %z) nounwind {
; X86-LABEL: func4:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mulb {{[0-9]+}}(%esp)
; X86-NEXT: andb $15, %al
; X86-NEXT: xorl %edx, %edx
define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind {
; SSE-LABEL: v1i8:
; SSE: # %bb.0:
-; SSE-NEXT: movb (%rdi), %al
+; SSE-NEXT: movzbl (%rdi), %eax
; SSE-NEXT: xorl %ecx, %ecx
; SSE-NEXT: subb (%rsi), %al
; SSE-NEXT: movzbl %al, %eax
;
; AVX-LABEL: v1i8:
; AVX: # %bb.0:
-; AVX-NEXT: movb (%rdi), %al
+; AVX-NEXT: movzbl (%rdi), %eax
; AVX-NEXT: xorl %ecx, %ecx
; AVX-NEXT: subb (%rsi), %al
; AVX-NEXT: movzbl %al, %eax
; SSE2-NEXT: pcmpeqw %xmm0, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl
-; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_setcc_v3i1_v3i16:
define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) {
; SSE-LABEL: load_sext_2i1_to_2i64:
; SSE: # %bb.0: # %entry
-; SSE-NEXT: movb (%rdi), %al
+; SSE-NEXT: movzbl (%rdi), %eax
; SSE-NEXT: movzbl %al, %ecx
; SSE-NEXT: shrb %al
; SSE-NEXT: movzbl %al, %eax
;
; AVX1-LABEL: load_sext_2i1_to_2i64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: movb (%rdi), %al
+; AVX1-NEXT: movzbl (%rdi), %eax
; AVX1-NEXT: movzbl %al, %ecx
; AVX1-NEXT: shrb %al
; AVX1-NEXT: movzbl %al, %eax
;
; AVX2-LABEL: load_sext_2i1_to_2i64:
; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: movb (%rdi), %al
+; AVX2-NEXT: movzbl (%rdi), %eax
; AVX2-NEXT: movzbl %al, %ecx
; AVX2-NEXT: shrb %al
; AVX2-NEXT: movzbl %al, %eax
; X86-SSE2-LABEL: load_sext_2i1_to_2i64:
; X86-SSE2: # %bb.0: # %entry
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movb (%eax), %al
+; X86-SSE2-NEXT: movzbl (%eax), %eax
; X86-SSE2-NEXT: movzbl %al, %ecx
; X86-SSE2-NEXT: shrb %al
; X86-SSE2-NEXT: movzbl %al, %eax
; X86-SSE41-LABEL: load_sext_2i1_to_2i64:
; X86-SSE41: # %bb.0: # %entry
; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movb (%eax), %al
+; X86-SSE41-NEXT: movzbl (%eax), %eax
; X86-SSE41-NEXT: movzbl %al, %ecx
; X86-SSE41-NEXT: andl $1, %ecx
; X86-SSE41-NEXT: negl %ecx
define <4 x i32> @load_sext_4i1_to_4i32(ptr%ptr) {
; SSE2-LABEL: load_sext_4i1_to_4i32:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movb (%rdi), %al
+; SSE2-NEXT: movzbl (%rdi), %eax
; SSE2-NEXT: movl %eax, %ecx
; SSE2-NEXT: shrb $3, %cl
; SSE2-NEXT: movzbl %cl, %ecx
;
; SSSE3-LABEL: load_sext_4i1_to_4i32:
; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: movb (%rdi), %al
+; SSSE3-NEXT: movzbl (%rdi), %eax
; SSSE3-NEXT: movl %eax, %ecx
; SSSE3-NEXT: shrb $3, %cl
; SSSE3-NEXT: movzbl %cl, %ecx
;
; SSE41-LABEL: load_sext_4i1_to_4i32:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: movb (%rdi), %al
+; SSE41-NEXT: movzbl (%rdi), %eax
; SSE41-NEXT: movzbl %al, %ecx
; SSE41-NEXT: shrb %al
; SSE41-NEXT: movzbl %al, %eax
;
; AVX1-LABEL: load_sext_4i1_to_4i32:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: movb (%rdi), %al
+; AVX1-NEXT: movzbl (%rdi), %eax
; AVX1-NEXT: movzbl %al, %ecx
; AVX1-NEXT: shrb %al
; AVX1-NEXT: movzbl %al, %eax
;
; AVX2-LABEL: load_sext_4i1_to_4i32:
; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: movb (%rdi), %al
+; AVX2-NEXT: movzbl (%rdi), %eax
; AVX2-NEXT: movzbl %al, %ecx
; AVX2-NEXT: shrb %al
; AVX2-NEXT: movzbl %al, %eax
; X86-SSE2-LABEL: load_sext_4i1_to_4i32:
; X86-SSE2: # %bb.0: # %entry
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movb (%eax), %al
+; X86-SSE2-NEXT: movzbl (%eax), %eax
; X86-SSE2-NEXT: movl %eax, %ecx
; X86-SSE2-NEXT: shrb $3, %cl
; X86-SSE2-NEXT: movzbl %cl, %ecx
; X86-SSE41-LABEL: load_sext_4i1_to_4i32:
; X86-SSE41: # %bb.0: # %entry
; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movb (%eax), %al
+; X86-SSE41-NEXT: movzbl (%eax), %eax
; X86-SSE41-NEXT: movl %eax, %ecx
; X86-SSE41-NEXT: shrb %cl
; X86-SSE41-NEXT: movzbl %cl, %ecx
define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
; SSE2-LABEL: load_sext_4i1_to_4i64:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movb (%rdi), %al
+; SSE2-NEXT: movzbl (%rdi), %eax
; SSE2-NEXT: movl %eax, %ecx
; SSE2-NEXT: shrb %cl
; SSE2-NEXT: andb $1, %cl
;
; SSSE3-LABEL: load_sext_4i1_to_4i64:
; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: movb (%rdi), %al
+; SSSE3-NEXT: movzbl (%rdi), %eax
; SSSE3-NEXT: movl %eax, %ecx
; SSSE3-NEXT: shrb %cl
; SSSE3-NEXT: andb $1, %cl
;
; SSE41-LABEL: load_sext_4i1_to_4i64:
; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: movb (%rdi), %al
+; SSE41-NEXT: movzbl (%rdi), %eax
; SSE41-NEXT: movl %eax, %ecx
; SSE41-NEXT: shrb %cl
; SSE41-NEXT: andb $1, %cl
;
; AVX1-LABEL: load_sext_4i1_to_4i64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: movb (%rdi), %al
+; AVX1-NEXT: movzbl (%rdi), %eax
; AVX1-NEXT: movzbl %al, %ecx
; AVX1-NEXT: shrb %al
; AVX1-NEXT: movzbl %al, %eax
;
; AVX2-LABEL: load_sext_4i1_to_4i64:
; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: movb (%rdi), %al
+; AVX2-NEXT: movzbl (%rdi), %eax
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrb $3, %cl
; AVX2-NEXT: movzbl %cl, %ecx
; X86-SSE2-LABEL: load_sext_4i1_to_4i64:
; X86-SSE2: # %bb.0: # %entry
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT: movb (%eax), %al
+; X86-SSE2-NEXT: movzbl (%eax), %eax
; X86-SSE2-NEXT: movl %eax, %ecx
; X86-SSE2-NEXT: shrb %cl
; X86-SSE2-NEXT: andb $1, %cl
; X86-SSE41-LABEL: load_sext_4i1_to_4i64:
; X86-SSE41: # %bb.0: # %entry
; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE41-NEXT: movb (%eax), %al
+; X86-SSE41-NEXT: movzbl (%eax), %eax
; X86-SSE41-NEXT: movl %eax, %ecx
; X86-SSE41-NEXT: shrb %cl
; X86-SSE41-NEXT: andb $1, %cl
define dso_local void @copy_7_bytes_volatile(ptr noalias nocapture, ptr noalias nocapture readonly) nounwind #0 {
; CHECK-LABEL: copy_7_bytes_volatile:
; CHECK: # %bb.0:
-; CHECK-NEXT: movb 6(%rsi), %al
+; CHECK-NEXT: movzbl 6(%rsi), %eax
; CHECK-NEXT: movb %al, 6(%rdi)
; CHECK-NEXT: movzwl 4(%rsi), %eax
; CHECK-NEXT: movw %ax, 4(%rdi)
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rsi), %eax
; CHECK-NEXT: movzwl 4(%rsi), %ecx
-; CHECK-NEXT: movb 6(%rsi), %dl
+; CHECK-NEXT: movzbl 6(%rsi), %edx
; CHECK-NEXT: movb %dl, 6(%rdi)
; CHECK-NEXT: movw %cx, 4(%rdi)
; CHECK-NEXT: movl %eax, (%rdi)
; CHECK: # %bb.0:
; CHECK-NEXT: movl (%rsi), %eax
; CHECK-NEXT: movzwl 4(%rsi), %ecx
-; CHECK-NEXT: movb 6(%rsi), %dl
+; CHECK-NEXT: movzbl 6(%rsi), %edx
; CHECK-NEXT: movb %dl, 6(%rdi)
; CHECK-NEXT: movw %cx, 4(%rdi)
; CHECK-NEXT: movl %eax, (%rdi)
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: movq %rdi, %rcx
; CHECK-NEXT: shrq $3, %rcx
-; CHECK-NEXT: movb 2147450880(%rcx), %cl
+; CHECK-NEXT: movzbl 2147450880(%rcx), %ecx
; CHECK-NEXT: testb %cl, %cl
; CHECK-NEXT: je .LBB0_3
; CHECK-NEXT: # %bb.1:
; WIN32-LABEL: smuloi8:
; WIN32: # %bb.0:
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al
+; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: imulb {{[0-9]+}}(%esp)
; WIN32-NEXT: seto %cl
; WIN32-NEXT: movb %al, (%edx)
; WIN32-LABEL: umuloi8:
; WIN32: # %bb.0:
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al
+; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: mulb {{[0-9]+}}(%esp)
; WIN32-NEXT: seto %cl
; WIN32-NEXT: movb %al, (%edx)
;
; WIN32-LABEL: smulobri8:
; WIN32: # %bb.0:
-; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al
+; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: imulb {{[0-9]+}}(%esp)
; WIN32-NEXT: jo LBB15_1
; WIN32-NEXT: # %bb.2: # %continue
;
; WIN32-LABEL: umulobri8:
; WIN32: # %bb.0:
-; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al
+; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: mulb {{[0-9]+}}(%esp)
; WIN32-NEXT: jo LBB19_1
; WIN32-NEXT: # %bb.2: # %continue
;
; FAST-LABEL: smuloi8_load:
; FAST: # %bb.0:
-; FAST-NEXT: movb (%rdi), %al
+; FAST-NEXT: movzbl (%rdi), %eax
; FAST-NEXT: imulb %sil
; FAST-NEXT: seto %cl
; FAST-NEXT: movb %al, (%rdx)
; WIN32: # %bb.0:
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: movb (%eax), %al
+; WIN32-NEXT: movzbl (%eax), %eax
; WIN32-NEXT: imulb {{[0-9]+}}(%esp)
; WIN32-NEXT: seto %cl
; WIN32-NEXT: movb %al, (%edx)
; WIN32-LABEL: smuloi8_load2:
; WIN32: # %bb.0:
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al
+; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: imulb (%ecx)
; WIN32-NEXT: seto %cl
;
; FAST-LABEL: umuloi8_load:
; FAST: # %bb.0:
-; FAST-NEXT: movb (%rdi), %al
+; FAST-NEXT: movzbl (%rdi), %eax
; FAST-NEXT: mulb %sil
; FAST-NEXT: seto %cl
; FAST-NEXT: movb %al, (%rdx)
; WIN32: # %bb.0:
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: movb (%eax), %al
+; WIN32-NEXT: movzbl (%eax), %eax
; WIN32-NEXT: mulb {{[0-9]+}}(%esp)
; WIN32-NEXT: seto %cl
; WIN32-NEXT: movb %al, (%edx)
; WIN32-LABEL: umuloi8_load2:
; WIN32: # %bb.0:
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al
+; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: mulb (%ecx)
; WIN32-NEXT: seto %cl
define i32 @t(i32 %a, i32 %b) nounwind ssp {
; X86-LABEL: t:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
; X86-NEXT: testb $64, %al
; X86-NEXT: je .LBB0_1
define i1 @xor_not_bools(i1 zeroext %x, i1 zeroext %y) nounwind {
; X86-LABEL: xor_not_bools:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorb {{[0-9]+}}(%esp), %al
; X86-NEXT: xorb $1, %al
; X86-NEXT: retl
define i8 @xor_sminval_i8(i8 %x) {
; X86-LABEL: xor_sminval_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addb $-128, %al
; X86-NEXT: retl
;
define i8 @xor_notsminval_i8(i8 %x) {
; X86-LABEL: xor_notsminval_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xorb $127, %al
; X86-NEXT: retl
;
define i8 @xor_add_sminval_i8(i8 %x, i8 %y) {
; X86-LABEL: xor_add_sminval_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addb {{[0-9]+}}(%esp), %al
; X86-NEXT: addb $-128, %al
; X86-NEXT: retl
define i8 @sub_xor_sminval_i8(i8 %x, i8 %y) {
; X86-LABEL: sub_xor_sminval_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addb $-128, %al
; X86-NEXT: subb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
define i8 @xor_shl_sminval_i8(i8 %x) {
; X86-LABEL: xor_shl_sminval_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addb %al, %al
; X86-NEXT: addb $-128, %al
; X86-NEXT: retl
define i8 @xor_i8_ri(i8 zeroext %0, i8 zeroext %1) {
; X86-LABEL: xor_i8_ri:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: xorb $-17, %cl
; X86-NEXT: je .LBB0_2
define i8 @xor_i8_rr(i8 zeroext %0, i8 zeroext %1) {
; X86-LABEL: xor_i8_rr:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: xorb %al, %cl
; X86-NEXT: je .LBB1_2
; X86-NEXT: # %bb.1:
define i8 @test6(i8 %a, i8 %b) nounwind {
; X86-LABEL: test6:
; X86: # %bb.0: # %entry
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: .LBB5_1: # %bb
; X86-NEXT: # =>This Inner Loop Header: Depth=1
define i32 @PR17487(i1 %tobool) {
; X86-LABEL: PR17487:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: notb %cl
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: testb $1, %cl
define i32 @test11(i32 %b) {
; X86-LABEL: test11:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $-2, %eax
; X86-NEXT: roll %cl, %eax
; X86-NEXT: retl
; X86-LABEL: test3:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb (%eax), %al
+; X86-NEXT: movzbl (%eax), %eax
; X86-NEXT: shlb $2, %al
; X86-NEXT: xorb $60, %al
; X86-NEXT: movzbl %al, %eax
;
; X64-LABEL: test3:
; X64: # %bb.0: # %entry
-; X64-NEXT: movb (%rdi), %al
+; X64-NEXT: movzbl (%rdi), %eax
; X64-NEXT: shlb $2, %al
; X64-NEXT: xorb $60, %al
; X64-NEXT: movzbl %al, %eax