MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
cl::desc("DAG combiner may split indexing from loads"));
-// This is a temporary debug flag to disable a combine that is known to
-// conflict with another combine.
-static cl::opt<bool>
-NarrowTruncatedBinops("narrow-truncated-binops", cl::Hidden, cl::init(false),
- cl::desc("Move truncates ahead of binops"));
-
namespace {
class DAGCombiner {
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
- // Narrow a suitable binary operation with a constant operand by moving it
- // ahead of the truncate. This is limited to pre-legalization because targets
- // may prefer a wider type during later combines and invert this transform.
+ // Narrow a suitable binary operation with a non-opaque constant operand by
+ // moving it ahead of the truncate. This is limited to pre-legalization
+ // because targets may prefer a wider type during later combines and invert
+ // this transform.
switch (N0.getOpcode()) {
// TODO: Add case for ADD - that will likely require a change in logic here
// or target-specific changes to avoid regressions.
case ISD::AND:
case ISD::OR:
case ISD::XOR:
- if (NarrowTruncatedBinops && !LegalOperations && N0.hasOneUse() &&
- (isConstantOrConstantVector(N0.getOperand(0)) ||
- isConstantOrConstantVector(N0.getOperand(1)))) {
+ if (!LegalOperations && N0.hasOneUse() &&
+ (isConstantOrConstantVector(N0.getOperand(0), true) ||
+ isConstantOrConstantVector(N0.getOperand(1), true))) {
// TODO: We already restricted this to pre-legalization, but for vectors
// we are extra cautious to not create an unsupported operation.
// Target-specific changes are likely needed to avoid regressions here.
; GCN: s_cbranch_scc1
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
-; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0xff
+; VI: v_mov_b32_e32 v{{[0-9]+}}, 0xff
; GCN: BB2_2:
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
-; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0x7f
+; VI: v_mov_b32_e32 v{{[0-9]+}}, 0x7f
; GCN: BB2_3:
; GCN: buffer_store_short
; SOURCE-SCHED-NEXT: setg %cl
; SOURCE-SCHED-NEXT: movb g_73, %dl
; SOURCE-SCHED-NEXT: xorl %eax, %eax
-; SOURCE-SCHED-NEXT: subl {{[0-9]+}}(%esp), %eax
+; SOURCE-SCHED-NEXT: subb {{[0-9]+}}(%esp), %al
; SOURCE-SCHED-NEXT: testb %dl, %dl
; SOURCE-SCHED-NEXT: jne .LBB0_2
; SOURCE-SCHED-NEXT: # %bb.1: # %bb11
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpq {{.*}}(%rip), %rax
-; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: sbbb %al, %al
; CHECK-NEXT: testb $-106, %al
; CHECK-NEXT: jle .LBB0_1
; CHECK-NEXT: # %bb.2: # %if.then
; CHECK-LABEL: PR30841:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: negl %eax
+; CHECK-NEXT: negb %al
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retl
entry:
; X64-LABEL: sub_zext_cmp_mask_narrower_result:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: andb $1, %al
; X64-NEXT: orb $46, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
;
; X32-LABEL: sub_zext_cmp_mask_narrower_result:
; X32: # %bb.0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: andb $1, %al
; X32-NEXT: orb $46, %al
-; X32-NEXT: # kill: def $al killed $al killed $eax
; X32-NEXT: retl
%a = and i32 %x, 1
%c = icmp eq i32 %a, 0
; X64-LABEL: add_zext_cmp_mask_narrower_result:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: andb $1, %al
; X64-NEXT: xorb $43, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
;
; X32-LABEL: add_zext_cmp_mask_narrower_result:
; X32: # %bb.0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: andb $1, %al
; X32-NEXT: xorb $43, %al
-; X32-NEXT: # kill: def $al killed $al killed $eax
; X32-NEXT: retl
%a = and i32 %x, 1
%c = icmp eq i32 %a, 0
;
; X32-LABEL: low_bit_select_constants_bigger_false_narrower_result:
; X32: # %bb.0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-NEXT: andl $1, %eax
; X32-NEXT: orl $36, %eax
; X32-NEXT: # kill: def $ax killed $ax killed $eax
; X64-LABEL: low_bit_select_constants_bigger_true_narrower_result:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: andb $1, %al
; X64-NEXT: xorb $41, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
;
; X32-LABEL: low_bit_select_constants_bigger_true_narrower_result:
; X32: # %bb.0:
-; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: andb $1, %al
; X32-NEXT: xorb $41, %al
-; X32-NEXT: # kill: def $al killed $al killed $eax
; X32-NEXT: retl
%a = and i16 %x, 1
%c = icmp eq i16 %a, 0
; X86-NOBMI2-LABEL: clear_lowbits16_ic0:
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI2-NEXT: movw $16, %cx
-; X86-NOBMI2-NEXT: subw {{[0-9]+}}(%esp), %cx
+; X86-NOBMI2-NEXT: movb $16, %cl
+; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI2-NEXT: shrl %cl, %eax
-; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $cx
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NOBMI2-NEXT: retl
; X86-BMI2-LABEL: clear_lowbits16_ic0:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movw $16, %cx
-; X86-BMI2-NEXT: subw {{[0-9]+}}(%esp), %cx
+; X86-BMI2-NEXT: movb $16, %cl
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NOBMI2-LABEL: clear_lowbits16_ic0:
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movzwl %di, %eax
-; X64-NOBMI2-NEXT: movl $16, %ecx
-; X64-NOBMI2-NEXT: subl %esi, %ecx
+; X64-NOBMI2-NEXT: movb $16, %cl
+; X64-NOBMI2-NEXT: subb %sil, %cl
; X64-NOBMI2-NEXT: shrl %cl, %eax
-; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shll %cl, %eax
; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NOBMI2-NEXT: retq
; X64-BMI2-LABEL: clear_lowbits16_ic0:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movzwl %di, %eax
-; X64-BMI2-NEXT: movl $16, %ecx
-; X64-BMI2-NEXT: subl %esi, %ecx
+; X64-BMI2-NEXT: movb $16, %cl
+; X64-BMI2-NEXT: subb %sil, %cl
; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax
; X64-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: movzwl (%eax), %eax
-; X86-NOBMI2-NEXT: movw $16, %cx
-; X86-NOBMI2-NEXT: subw {{[0-9]+}}(%esp), %cx
+; X86-NOBMI2-NEXT: movb $16, %cl
+; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI2-NEXT: shrl %cl, %eax
-; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $cx
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NOBMI2-NEXT: retl
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: movzwl (%eax), %eax
-; X86-BMI2-NEXT: movw $16, %cx
-; X86-BMI2-NEXT: subw {{[0-9]+}}(%esp), %cx
+; X86-BMI2-NEXT: movb $16, %cl
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NOBMI2-LABEL: clear_lowbits16_ic2_load:
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movzwl (%rdi), %eax
-; X64-NOBMI2-NEXT: movl $16, %ecx
-; X64-NOBMI2-NEXT: subl %esi, %ecx
+; X64-NOBMI2-NEXT: movb $16, %cl
+; X64-NOBMI2-NEXT: subb %sil, %cl
; X64-NOBMI2-NEXT: shrl %cl, %eax
-; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shll %cl, %eax
; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NOBMI2-NEXT: retq
; X64-BMI2-LABEL: clear_lowbits16_ic2_load:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movzwl (%rdi), %eax
-; X64-BMI2-NEXT: movl $16, %ecx
-; X64-BMI2-NEXT: subl %esi, %ecx
+; X64-BMI2-NEXT: movb $16, %cl
+; X64-BMI2-NEXT: subb %sil, %cl
; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax
; X64-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NOBMI2-LABEL: clear_lowbits16_ic4_commutative:
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI2-NEXT: movw $16, %cx
-; X86-NOBMI2-NEXT: subw {{[0-9]+}}(%esp), %cx
+; X86-NOBMI2-NEXT: movb $16, %cl
+; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI2-NEXT: shrl %cl, %eax
-; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $cx
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NOBMI2-NEXT: retl
; X86-BMI2-LABEL: clear_lowbits16_ic4_commutative:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movw $16, %cx
-; X86-BMI2-NEXT: subw {{[0-9]+}}(%esp), %cx
+; X86-BMI2-NEXT: movb $16, %cl
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NOBMI2-LABEL: clear_lowbits16_ic4_commutative:
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movzwl %di, %eax
-; X64-NOBMI2-NEXT: movl $16, %ecx
-; X64-NOBMI2-NEXT: subl %esi, %ecx
+; X64-NOBMI2-NEXT: movb $16, %cl
+; X64-NOBMI2-NEXT: subb %sil, %cl
; X64-NOBMI2-NEXT: shrl %cl, %eax
-; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shll %cl, %eax
; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NOBMI2-NEXT: retq
; X64-BMI2-LABEL: clear_lowbits16_ic4_commutative:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movzwl %di, %eax
-; X64-BMI2-NEXT: movl $16, %ecx
-; X64-BMI2-NEXT: subl %esi, %ecx
+; X64-BMI2-NEXT: movb $16, %cl
+; X64-BMI2-NEXT: subb %sil, %cl
; X64-BMI2-NEXT: shrxl %ecx, %eax, %eax
; X64-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: xorl %ecx, %ecx
-; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-BMI2-LABEL: clear_lowbits32_ic0:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: xorl %eax, %eax
-; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movl %esi, %ecx
; X64-NOBMI2-NEXT: movl %edi, %eax
-; X64-NOBMI2-NEXT: negl %ecx
+; X64-NOBMI2-NEXT: negb %cl
; X64-NOBMI2-NEXT: shrl %cl, %eax
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shll %cl, %eax
;
; X64-BMI2-LABEL: clear_lowbits32_ic0:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: negl %esi
+; X64-BMI2-NEXT: negb %sil
; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
; X64-BMI2-NEXT: retq
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: movl (%eax), %eax
; X86-NOBMI2-NEXT: xorl %ecx, %ecx
-; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: xorl %ecx, %ecx
-; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X86-BMI2-NEXT: retl
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movl %esi, %ecx
; X64-NOBMI2-NEXT: movl (%rdi), %eax
-; X64-NOBMI2-NEXT: negl %ecx
+; X64-NOBMI2-NEXT: negb %cl
; X64-NOBMI2-NEXT: shrl %cl, %eax
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shll %cl, %eax
;
; X64-BMI2-LABEL: clear_lowbits32_ic2_load:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: negl %esi
+; X64-BMI2-NEXT: negb %sil
; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax
; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
; X64-BMI2-NEXT: retq
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: xorl %ecx, %ecx
-; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-BMI2-LABEL: clear_lowbits32_ic4_commutative:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: xorl %eax, %eax
-; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movl %esi, %ecx
; X64-NOBMI2-NEXT: movl %edi, %eax
-; X64-NOBMI2-NEXT: negl %ecx
+; X64-NOBMI2-NEXT: negb %cl
; X64-NOBMI2-NEXT: shrl %cl, %eax
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shll %cl, %eax
;
; X64-BMI2-LABEL: clear_lowbits32_ic4_commutative:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: negl %esi
+; X64-BMI2-NEXT: negb %sil
; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
; X64-BMI2-NEXT: retq
define i64 @clear_lowbits64_ic0(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI2-LABEL: clear_lowbits64_ic0:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movl $64, %ecx
-; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT: movb $64, %cl
+; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
;
; X86-BMI2-LABEL: clear_lowbits64_ic0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movl $64, %ecx
-; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb $64, %cl
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
; X86-BMI2-NEXT: shldl %cl, %edx, %edx
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movq %rsi, %rcx
; X64-NOBMI2-NEXT: movq %rdi, %rax
-; X64-NOBMI2-NEXT: negl %ecx
+; X64-NOBMI2-NEXT: negb %cl
; X64-NOBMI2-NEXT: shrq %cl, %rax
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI2-NEXT: shlq %cl, %rax
;
; X64-BMI2-LABEL: clear_lowbits64_ic0:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: negl %esi
+; X64-BMI2-NEXT: negb %sil
; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
; X64-BMI2-NEXT: retq
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: pushl %esi
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI2-NEXT: movl $64, %ecx
-; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT: movb $64, %cl
+; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movl $64, %ecx
-; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb $64, %cl
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
; X86-BMI2-NEXT: shldl %cl, %edx, %edx
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movq %rsi, %rcx
; X64-NOBMI2-NEXT: movq (%rdi), %rax
-; X64-NOBMI2-NEXT: negl %ecx
+; X64-NOBMI2-NEXT: negb %cl
; X64-NOBMI2-NEXT: shrq %cl, %rax
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI2-NEXT: shlq %cl, %rax
;
; X64-BMI2-LABEL: clear_lowbits64_ic2_load:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: negl %esi
+; X64-BMI2-NEXT: negb %sil
; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax
; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
; X64-BMI2-NEXT: retq
define i64 @clear_lowbits64_ic4_commutative(i64 %val, i64 %numlowbits) nounwind {
; X86-NOBMI2-LABEL: clear_lowbits64_ic4_commutative:
; X86-NOBMI2: # %bb.0:
-; X86-NOBMI2-NEXT: movl $64, %ecx
-; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI2-NEXT: movb $64, %cl
+; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI2-NEXT: movl $-1, %edx
; X86-NOBMI2-NEXT: movl $-1, %eax
; X86-NOBMI2-NEXT: shll %cl, %eax
;
; X86-BMI2-LABEL: clear_lowbits64_ic4_commutative:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movl $64, %ecx
-; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb $64, %cl
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: movl $-1, %edx
; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax
; X86-BMI2-NEXT: shldl %cl, %edx, %edx
; X64-NOBMI2: # %bb.0:
; X64-NOBMI2-NEXT: movq %rsi, %rcx
; X64-NOBMI2-NEXT: movq %rdi, %rax
-; X64-NOBMI2-NEXT: negl %ecx
+; X64-NOBMI2-NEXT: negb %cl
; X64-NOBMI2-NEXT: shrq %cl, %rax
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI2-NEXT: shlq %cl, %rax
;
; X64-BMI2-LABEL: clear_lowbits64_ic4_commutative:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: negl %esi
+; X64-BMI2-NEXT: negb %sil
; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
; X64-BMI2-NEXT: retq
; CHECK-NEXT: movsbl {{.*}}(%rip), %edx
; CHECK-NEXT: movzbl %dl, %ecx
; CHECK-NEXT: shrl $7, %ecx
-; CHECK-NEXT: xorl $1, %ecx
+; CHECK-NEXT: xorb $1, %cl
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: sarl %cl, %edx
; CHECK-NEXT: movb {{.*}}(%rip), %al
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-NOBMI-NEXT: xorl %ecx, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1NOTBM-NEXT: shrl %cl, %edi
; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-BMI1NOTBM-NEXT: shrl %cl, %esi
;
; X86-BMI1BMI2-LABEL: bextr32_c0:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: pushl %edi
+; X86-BMI1BMI2-NEXT: pushl %ebx
; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: pushl %eax
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: negl %eax
+; X86-BMI1BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movl %ebx, %eax
+; X86-BMI1BMI2-NEXT: negb %al
; X86-BMI1BMI2-NEXT: movl $-1, %ecx
; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
; X86-BMI1BMI2-NEXT: movl %eax, (%esp)
; X86-BMI1BMI2-NEXT: calll use32
-; X86-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax
+; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax
; X86-BMI1BMI2-NEXT: addl $4, %esp
; X86-BMI1BMI2-NEXT: popl %esi
-; X86-BMI1BMI2-NEXT: popl %edi
+; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bextr32_c0:
; X64-NOBMI-NEXT: movl %edi, %ebx
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %ebx
-; X64-NOBMI-NEXT: negl %edx
+; X64-NOBMI-NEXT: negb %dl
; X64-NOBMI-NEXT: movl $-1, %ebp
; X64-NOBMI-NEXT: movl %edx, %ecx
; X64-NOBMI-NEXT: shrl %cl, %ebp
; X64-BMI1NOTBM-NEXT: movl %edi, %ebx
; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-BMI1NOTBM-NEXT: shrl %cl, %ebx
-; X64-BMI1NOTBM-NEXT: negl %edx
+; X64-BMI1NOTBM-NEXT: negb %dl
; X64-BMI1NOTBM-NEXT: movl $-1, %ebp
; X64-BMI1NOTBM-NEXT: movl %edx, %ecx
; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp
; X64-BMI1BMI2-NEXT: pushq %rax
; X64-BMI1BMI2-NEXT: movl %edx, %ebx
; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %ebp
-; X64-BMI1BMI2-NEXT: movl %edx, %eax
-; X64-BMI1BMI2-NEXT: negl %eax
+; X64-BMI1BMI2-NEXT: movl %ebx, %eax
+; X64-BMI1BMI2-NEXT: negb %al
; X64-BMI1BMI2-NEXT: movl $-1, %ecx
; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi
; X64-BMI1BMI2-NEXT: callq use32
; X86-NOBMI-NEXT: movl (%eax), %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-NOBMI-NEXT: xorl %ecx, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-BMI1NOTBM-NEXT: movl (%eax), %edi
; X86-BMI1NOTBM-NEXT: shrl %cl, %edi
; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-BMI1NOTBM-NEXT: shrl %cl, %esi
;
; X86-BMI1BMI2-LABEL: bextr32_c2_load:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: pushl %edi
+; X86-BMI1BMI2-NEXT: pushl %ebx
; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: pushl %eax
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: shrxl %ecx, (%eax), %edi
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: negl %eax
+; X86-BMI1BMI2-NEXT: shrxl %ecx, (%eax), %esi
+; X86-BMI1BMI2-NEXT: movl %ebx, %eax
+; X86-BMI1BMI2-NEXT: negb %al
; X86-BMI1BMI2-NEXT: movl $-1, %ecx
; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
; X86-BMI1BMI2-NEXT: movl %eax, (%esp)
; X86-BMI1BMI2-NEXT: calll use32
-; X86-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax
+; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax
; X86-BMI1BMI2-NEXT: addl $4, %esp
; X86-BMI1BMI2-NEXT: popl %esi
-; X86-BMI1BMI2-NEXT: popl %edi
+; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bextr32_c2_load:
; X64-NOBMI-NEXT: movl (%rdi), %ebp
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %ebp
-; X64-NOBMI-NEXT: negl %edx
+; X64-NOBMI-NEXT: negb %dl
; X64-NOBMI-NEXT: movl $-1, %ebx
; X64-NOBMI-NEXT: movl %edx, %ecx
; X64-NOBMI-NEXT: shrl %cl, %ebx
; X64-BMI1NOTBM-NEXT: movl (%rdi), %ebp
; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp
-; X64-BMI1NOTBM-NEXT: negl %edx
+; X64-BMI1NOTBM-NEXT: negb %dl
; X64-BMI1NOTBM-NEXT: movl $-1, %ebx
; X64-BMI1NOTBM-NEXT: movl %edx, %ecx
; X64-BMI1NOTBM-NEXT: shrl %cl, %ebx
; X64-BMI1BMI2-NEXT: pushq %rax
; X64-BMI1BMI2-NEXT: movl %edx, %ebx
; X64-BMI1BMI2-NEXT: shrxl %esi, (%rdi), %ebp
-; X64-BMI1BMI2-NEXT: movl %edx, %eax
-; X64-BMI1BMI2-NEXT: negl %eax
+; X64-BMI1BMI2-NEXT: movl %ebx, %eax
+; X64-BMI1BMI2-NEXT: negb %al
; X64-BMI1BMI2-NEXT: movl $-1, %ecx
; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi
; X64-BMI1BMI2-NEXT: callq use32
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-NOBMI-NEXT: xorl %ecx, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-BMI1NOTBM-NEXT: shrl %cl, %edi
; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-BMI1NOTBM-NEXT: shrl %cl, %esi
;
; X86-BMI1BMI2-LABEL: bextr32_c4_commutative:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: pushl %edi
+; X86-BMI1BMI2-NEXT: pushl %ebx
; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: pushl %eax
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: negl %eax
+; X86-BMI1BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movl %ebx, %eax
+; X86-BMI1BMI2-NEXT: negb %al
; X86-BMI1BMI2-NEXT: movl $-1, %ecx
; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
; X86-BMI1BMI2-NEXT: movl %eax, (%esp)
; X86-BMI1BMI2-NEXT: calll use32
-; X86-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax
+; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %eax
; X86-BMI1BMI2-NEXT: addl $4, %esp
; X86-BMI1BMI2-NEXT: popl %esi
-; X86-BMI1BMI2-NEXT: popl %edi
+; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bextr32_c4_commutative:
; X64-NOBMI-NEXT: movl %edi, %ebx
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %ebx
-; X64-NOBMI-NEXT: negl %edx
+; X64-NOBMI-NEXT: negb %dl
; X64-NOBMI-NEXT: movl $-1, %ebp
; X64-NOBMI-NEXT: movl %edx, %ecx
; X64-NOBMI-NEXT: shrl %cl, %ebp
; X64-BMI1NOTBM-NEXT: movl %edi, %ebx
; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-BMI1NOTBM-NEXT: shrl %cl, %ebx
-; X64-BMI1NOTBM-NEXT: negl %edx
+; X64-BMI1NOTBM-NEXT: negb %dl
; X64-BMI1NOTBM-NEXT: movl $-1, %ebp
; X64-BMI1NOTBM-NEXT: movl %edx, %ecx
; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp
; X64-BMI1BMI2-NEXT: pushq %rax
; X64-BMI1BMI2-NEXT: movl %edx, %ebx
; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %ebp
-; X64-BMI1BMI2-NEXT: movl %edx, %eax
-; X64-BMI1BMI2-NEXT: negl %eax
+; X64-BMI1BMI2-NEXT: movl %ebx, %eax
+; X64-BMI1BMI2-NEXT: negb %al
; X64-BMI1BMI2-NEXT: movl $-1, %ecx
; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi
; X64-BMI1BMI2-NEXT: callq use32
; X86-NOBMI-NEXT: movl %ebx, %ecx
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-NOBMI-NEXT: xorl %ecx, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-BMI1NOTBM-NEXT: movl %ebx, %ecx
; X86-BMI1NOTBM-NEXT: shrl %cl, %edi
; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-BMI1NOTBM-NEXT: shrl %cl, %esi
; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: subl $16, %esp
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT: shrxl %edi, {{[0-9]+}}(%esp), %ebx
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: negl %eax
+; X86-BMI1BMI2-NEXT: shrxl %edi, {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT: movl %ebx, %eax
+; X86-BMI1BMI2-NEXT: negb %al
; X86-BMI1BMI2-NEXT: movl $-1, %ecx
; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
; X86-BMI1BMI2-NEXT: movl %eax, (%esp)
; X86-BMI1BMI2-NEXT: calll use32
-; X86-BMI1BMI2-NEXT: bzhil %esi, %ebx, %esi
+; X86-BMI1BMI2-NEXT: bzhil %ebx, %esi, %esi
; X86-BMI1BMI2-NEXT: movl %edi, (%esp)
; X86-BMI1BMI2-NEXT: calll use32
; X86-BMI1BMI2-NEXT: movl %esi, %eax
; X64-NOBMI-NEXT: movl %edi, %ebp
; X64-NOBMI-NEXT: movl %r14d, %ecx
; X64-NOBMI-NEXT: shrl %cl, %ebp
-; X64-NOBMI-NEXT: negl %edx
+; X64-NOBMI-NEXT: negb %dl
; X64-NOBMI-NEXT: movl $-1, %ebx
; X64-NOBMI-NEXT: movl %edx, %ecx
; X64-NOBMI-NEXT: shrl %cl, %ebx
; X64-BMI1NOTBM-NEXT: movl %edi, %ebp
; X64-BMI1NOTBM-NEXT: movl %r14d, %ecx
; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp
-; X64-BMI1NOTBM-NEXT: negl %edx
+; X64-BMI1NOTBM-NEXT: negb %dl
; X64-BMI1NOTBM-NEXT: movl $-1, %ebx
; X64-BMI1NOTBM-NEXT: movl %edx, %ecx
; X64-BMI1NOTBM-NEXT: shrl %cl, %ebx
; X64-BMI1BMI2-NEXT: movl %edx, %ebx
; X64-BMI1BMI2-NEXT: movl %esi, %ebp
; X64-BMI1BMI2-NEXT: shrxl %esi, %edi, %r14d
-; X64-BMI1BMI2-NEXT: movl %edx, %eax
-; X64-BMI1BMI2-NEXT: negl %eax
+; X64-BMI1BMI2-NEXT: movl %ebx, %eax
+; X64-BMI1BMI2-NEXT: negb %al
; X64-BMI1BMI2-NEXT: movl $-1, %ecx
; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi
; X64-BMI1BMI2-NEXT: callq use32
; X86-NOBMI-NEXT: movl %edi, %esi
; X86-NOBMI-NEXT: xorl %edi, %edi
; X86-NOBMI-NEXT: .LBB32_2:
-; X86-NOBMI-NEXT: movl $64, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: movb $64, %cl
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %ebp
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
; X86-BMI1NOTBM-NEXT: movl %edi, %esi
; X86-BMI1NOTBM-NEXT: xorl %edi, %edi
; X86-BMI1NOTBM-NEXT: .LBB32_2:
-; X86-BMI1NOTBM-NEXT: movl $64, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: movb $64, %cl
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
; X86-BMI1BMI2-NEXT: movl %edi, %esi
; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB32_2:
-; X86-BMI1BMI2-NEXT: movl $64, %ecx
-; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: movb $64, %cl
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: movl $-1, %ebx
; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp
; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx
; X64-NOBMI-NEXT: movq %rdi, %r14
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %r14
-; X64-NOBMI-NEXT: negl %edx
+; X64-NOBMI-NEXT: negb %dl
; X64-NOBMI-NEXT: movq $-1, %rbx
; X64-NOBMI-NEXT: movl %edx, %ecx
; X64-NOBMI-NEXT: shrq %cl, %rbx
; X64-BMI1NOTBM-NEXT: movq %rdi, %r14
; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-BMI1NOTBM-NEXT: shrq %cl, %r14
-; X64-BMI1NOTBM-NEXT: negl %edx
+; X64-BMI1NOTBM-NEXT: negb %dl
; X64-BMI1NOTBM-NEXT: movq $-1, %rbx
; X64-BMI1NOTBM-NEXT: movl %edx, %ecx
; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx
; X64-BMI1BMI2-NEXT: movq %rdx, %rbx
; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %r14
; X64-BMI1BMI2-NEXT: movl %ebx, %eax
-; X64-BMI1BMI2-NEXT: negl %eax
+; X64-BMI1BMI2-NEXT: negb %al
; X64-BMI1BMI2-NEXT: movq $-1, %rcx
; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi
; X64-BMI1BMI2-NEXT: callq use64
; X86-NOBMI-NEXT: movl %edi, %esi
; X86-NOBMI-NEXT: xorl %edi, %edi
; X86-NOBMI-NEXT: .LBB34_2:
-; X86-NOBMI-NEXT: movl $64, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: movb $64, %cl
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %ebp
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
; X86-BMI1NOTBM-NEXT: movl %edi, %esi
; X86-BMI1NOTBM-NEXT: xorl %edi, %edi
; X86-BMI1NOTBM-NEXT: .LBB34_2:
-; X86-BMI1NOTBM-NEXT: movl $64, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: movb $64, %cl
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
; X86-BMI1BMI2-NEXT: movl %edi, %esi
; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB34_2:
-; X86-BMI1BMI2-NEXT: movl $64, %ecx
-; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: movb $64, %cl
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: movl $-1, %ebx
; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp
; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx
; X64-NOBMI-NEXT: movq (%rdi), %r14
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %r14
-; X64-NOBMI-NEXT: negl %edx
+; X64-NOBMI-NEXT: negb %dl
; X64-NOBMI-NEXT: movq $-1, %rbx
; X64-NOBMI-NEXT: movl %edx, %ecx
; X64-NOBMI-NEXT: shrq %cl, %rbx
; X64-BMI1NOTBM-NEXT: movq (%rdi), %r14
; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-BMI1NOTBM-NEXT: shrq %cl, %r14
-; X64-BMI1NOTBM-NEXT: negl %edx
+; X64-BMI1NOTBM-NEXT: negb %dl
; X64-BMI1NOTBM-NEXT: movq $-1, %rbx
; X64-BMI1NOTBM-NEXT: movl %edx, %ecx
; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx
; X64-BMI1BMI2-NEXT: movq %rdx, %rbx
; X64-BMI1BMI2-NEXT: shrxq %rsi, (%rdi), %r14
; X64-BMI1BMI2-NEXT: movl %ebx, %eax
-; X64-BMI1BMI2-NEXT: negl %eax
+; X64-BMI1BMI2-NEXT: negb %al
; X64-BMI1BMI2-NEXT: movq $-1, %rcx
; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi
; X64-BMI1BMI2-NEXT: callq use64
; X86-NOBMI-NEXT: movl %edi, %esi
; X86-NOBMI-NEXT: xorl %edi, %edi
; X86-NOBMI-NEXT: .LBB36_2:
-; X86-NOBMI-NEXT: movl $64, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: movb $64, %cl
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %ebp
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
; X86-BMI1NOTBM-NEXT: movl %edi, %esi
; X86-BMI1NOTBM-NEXT: xorl %edi, %edi
; X86-BMI1NOTBM-NEXT: .LBB36_2:
-; X86-BMI1NOTBM-NEXT: movl $64, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: movb $64, %cl
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
; X86-BMI1BMI2-NEXT: movl %edi, %esi
; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB36_2:
-; X86-BMI1BMI2-NEXT: movl $64, %ecx
-; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: movb $64, %cl
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: movl $-1, %ebx
; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebx, %ebp
; X86-BMI1BMI2-NEXT: shrdl %cl, %ebx, %ebx
; X64-NOBMI-NEXT: movq %rdi, %r14
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %r14
-; X64-NOBMI-NEXT: negl %edx
+; X64-NOBMI-NEXT: negb %dl
; X64-NOBMI-NEXT: movq $-1, %rbx
; X64-NOBMI-NEXT: movl %edx, %ecx
; X64-NOBMI-NEXT: shrq %cl, %rbx
; X64-BMI1NOTBM-NEXT: movq %rdi, %r14
; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-BMI1NOTBM-NEXT: shrq %cl, %r14
-; X64-BMI1NOTBM-NEXT: negl %edx
+; X64-BMI1NOTBM-NEXT: negb %dl
; X64-BMI1NOTBM-NEXT: movq $-1, %rbx
; X64-BMI1NOTBM-NEXT: movl %edx, %ecx
; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx
; X64-BMI1BMI2-NEXT: movq %rdx, %rbx
; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %r14
; X64-BMI1BMI2-NEXT: movl %ebx, %eax
-; X64-BMI1BMI2-NEXT: negl %eax
+; X64-BMI1BMI2-NEXT: negb %al
; X64-BMI1BMI2-NEXT: movq $-1, %rcx
; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi
; X64-BMI1BMI2-NEXT: callq use64
; X86-NOBMI-NEXT: movl %edi, %esi
; X86-NOBMI-NEXT: xorl %edi, %edi
; X86-NOBMI-NEXT: .LBB37_2:
-; X86-NOBMI-NEXT: movl $64, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: movb $64, %cl
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: movl $-1, %ebp
; X86-NOBMI-NEXT: shrl %cl, %ebp
; X86-BMI1NOTBM-NEXT: movl %edi, %esi
; X86-BMI1NOTBM-NEXT: xorl %edi, %edi
; X86-BMI1NOTBM-NEXT: .LBB37_2:
-; X86-BMI1NOTBM-NEXT: movl $64, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: movb $64, %cl
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: movl $-1, %ebp
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebp
; X86-BMI1BMI2-NEXT: movl %edi, %esi
; X86-BMI1BMI2-NEXT: xorl %edi, %edi
; X86-BMI1BMI2-NEXT: .LBB37_2:
-; X86-BMI1BMI2-NEXT: movl $64, %ecx
-; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: movb $64, %cl
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: movl $-1, %ebp
; X86-BMI1BMI2-NEXT: shrxl %ecx, %ebp, %ebx
; X86-BMI1BMI2-NEXT: shrdl %cl, %ebp, %ebp
; X64-NOBMI-NEXT: movq %rdi, %r15
; X64-NOBMI-NEXT: movl %r14d, %ecx
; X64-NOBMI-NEXT: shrq %cl, %r15
-; X64-NOBMI-NEXT: negl %edx
+; X64-NOBMI-NEXT: negb %dl
; X64-NOBMI-NEXT: movq $-1, %rbx
; X64-NOBMI-NEXT: movl %edx, %ecx
; X64-NOBMI-NEXT: shrq %cl, %rbx
; X64-BMI1NOTBM-NEXT: movq %rdi, %r15
; X64-BMI1NOTBM-NEXT: movl %r14d, %ecx
; X64-BMI1NOTBM-NEXT: shrq %cl, %r15
-; X64-BMI1NOTBM-NEXT: negl %edx
+; X64-BMI1NOTBM-NEXT: negb %dl
; X64-BMI1NOTBM-NEXT: movq $-1, %rbx
; X64-BMI1NOTBM-NEXT: movl %edx, %ecx
; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx
; X64-BMI1BMI2-NEXT: movq %rsi, %r14
; X64-BMI1BMI2-NEXT: shrxq %rsi, %rdi, %r15
; X64-BMI1BMI2-NEXT: movl %ebx, %eax
-; X64-BMI1BMI2-NEXT: negl %eax
+; X64-BMI1BMI2-NEXT: negb %al
; X64-BMI1BMI2-NEXT: movq $-1, %rcx
; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi
; X64-BMI1BMI2-NEXT: callq use64
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: shrl %cl, %eax
; X86-NOBMI-NEXT: xorl %ecx, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %eax
;
; X86-BMI1NOTBM-LABEL: bextr32_d0:
; X86-BMI1NOTBM: # %bb.0:
-; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI1NOTBM-NEXT: shll $8, %eax
-; X86-BMI1NOTBM-NEXT: orl %ecx, %eax
-; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: orl %eax, %ecx
+; X86-BMI1NOTBM-NEXT: bextrl %ecx, {{[0-9]+}}(%esp), %eax
; X86-BMI1NOTBM-NEXT: retl
;
; X86-BMI1BMI2-LABEL: bextr32_d0:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx
; X86-BMI1BMI2-NEXT: bzhil %eax, %ecx, %eax
; X64-NOBMI-NEXT: movl %edi, %eax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %eax
-; X64-NOBMI-NEXT: negl %edx
+; X64-NOBMI-NEXT: negb %dl
; X64-NOBMI-NEXT: movl %edx, %ecx
; X64-NOBMI-NEXT: shll %cl, %eax
; X64-NOBMI-NEXT: shrl %cl, %eax
; X86-NOBMI-NEXT: movl (%eax), %eax
; X86-NOBMI-NEXT: shrl %cl, %eax
; X86-NOBMI-NEXT: xorl %ecx, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %eax
; X86-BMI1NOTBM-LABEL: bextr32_d2_load:
; X86-BMI1NOTBM: # %bb.0:
; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: shll $8, %ecx
-; X86-BMI1NOTBM-NEXT: orl %edx, %ecx
-; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax
+; X86-BMI1NOTBM-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI1NOTBM-NEXT: orl %ecx, %edx
+; X86-BMI1NOTBM-NEXT: bextrl %edx, (%eax), %eax
; X86-BMI1NOTBM-NEXT: retl
;
; X86-BMI1BMI2-LABEL: bextr32_d2_load:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
; X86-BMI1BMI2-NEXT: shrxl %edx, (%ecx), %ecx
; X64-NOBMI-NEXT: movl (%rdi), %eax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %eax
-; X64-NOBMI-NEXT: negl %edx
+; X64-NOBMI-NEXT: negb %dl
; X64-NOBMI-NEXT: movl %edx, %ecx
; X64-NOBMI-NEXT: shll %cl, %eax
; X64-NOBMI-NEXT: shrl %cl, %eax
; X86-NOBMI-NEXT: movl %eax, %ecx
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-NOBMI-NEXT: xorl %ecx, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: shll %cl, %esi
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-BMI1NOTBM: # %bb.0:
; X86-BMI1NOTBM-NEXT: pushl %esi
; X86-BMI1NOTBM-NEXT: subl $8, %esp
-; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1NOTBM-NEXT: shll $8, %ecx
; X86-BMI1NOTBM-NEXT: movzbl %al, %edx
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI1BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx
; X86-BMI1BMI2-NEXT: bzhil %eax, %edx, %esi
; X64-NOBMI-NEXT: movl %edi, %ebx
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: shrl %cl, %ebx
-; X64-NOBMI-NEXT: negl %edx
+; X64-NOBMI-NEXT: negb %dl
; X64-NOBMI-NEXT: movl %edx, %ecx
; X64-NOBMI-NEXT: shll %cl, %ebx
; X64-NOBMI-NEXT: shrl %cl, %ebx
; X86-NOBMI-NEXT: movl %eax, %edi
; X86-NOBMI-NEXT: xorl %eax, %eax
; X86-NOBMI-NEXT: .LBB43_2:
-; X86-NOBMI-NEXT: movl $64, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: movb $64, %cl
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: shldl %cl, %edi, %eax
; X86-NOBMI-NEXT: shll %cl, %edi
; X86-NOBMI-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: movl %eax, %edi
; X86-BMI1NOTBM-NEXT: xorl %eax, %eax
; X86-BMI1NOTBM-NEXT: .LBB43_2:
-; X86-BMI1NOTBM-NEXT: movl $64, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: movb $64, %cl
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %eax
; X86-BMI1NOTBM-NEXT: shll %cl, %edi
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: movl %esi, %eax
; X86-BMI1BMI2-NEXT: xorl %esi, %esi
; X86-BMI1BMI2-NEXT: .LBB43_2:
-; X86-BMI1BMI2-NEXT: movl $64, %ecx
-; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: movb $64, %cl
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi
; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X64-NOBMI-NEXT: movq %rdi, %rax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %rax
-; X64-NOBMI-NEXT: negl %edx
+; X64-NOBMI-NEXT: negb %dl
; X64-NOBMI-NEXT: movl %edx, %ecx
; X64-NOBMI-NEXT: shlq %cl, %rax
; X64-NOBMI-NEXT: shrq %cl, %rax
; X86-NOBMI-NEXT: movl %eax, %edi
; X86-NOBMI-NEXT: xorl %eax, %eax
; X86-NOBMI-NEXT: .LBB45_2:
-; X86-NOBMI-NEXT: movl $64, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: movb $64, %cl
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: shldl %cl, %edi, %eax
; X86-NOBMI-NEXT: shll %cl, %edi
; X86-NOBMI-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: movl %eax, %edi
; X86-BMI1NOTBM-NEXT: xorl %eax, %eax
; X86-BMI1NOTBM-NEXT: .LBB45_2:
-; X86-BMI1NOTBM-NEXT: movl $64, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: movb $64, %cl
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %eax
; X86-BMI1NOTBM-NEXT: shll %cl, %edi
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: movl %esi, %eax
; X86-BMI1BMI2-NEXT: xorl %esi, %esi
; X86-BMI1BMI2-NEXT: .LBB45_2:
-; X86-BMI1BMI2-NEXT: movl $64, %ecx
-; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: movb $64, %cl
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi
; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X64-NOBMI-NEXT: movq (%rdi), %rax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %rax
-; X64-NOBMI-NEXT: negl %edx
+; X64-NOBMI-NEXT: negb %dl
; X64-NOBMI-NEXT: movl %edx, %ecx
; X64-NOBMI-NEXT: shlq %cl, %rax
; X64-NOBMI-NEXT: shrq %cl, %rax
; X86-NOBMI-NEXT: movl %esi, %ebx
; X86-NOBMI-NEXT: xorl %esi, %esi
; X86-NOBMI-NEXT: .LBB47_2:
-; X86-NOBMI-NEXT: movl $64, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: movb $64, %cl
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: shldl %cl, %ebx, %esi
; X86-NOBMI-NEXT: shll %cl, %ebx
; X86-NOBMI-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: movl %esi, %ebx
; X86-BMI1NOTBM-NEXT: xorl %esi, %esi
; X86-BMI1NOTBM-NEXT: .LBB47_2:
-; X86-BMI1NOTBM-NEXT: movl $64, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: movb $64, %cl
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %esi
; X86-BMI1NOTBM-NEXT: shll %cl, %ebx
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: movl %edx, %edi
; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X86-BMI1BMI2-NEXT: .LBB47_2:
-; X86-BMI1BMI2-NEXT: movl $64, %ecx
-; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: movb $64, %cl
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %edx
; X86-BMI1BMI2-NEXT: shlxl %ecx, %edi, %ebx
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X64-NOBMI-NEXT: movq %rdi, %rbx
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: shrq %cl, %rbx
-; X64-NOBMI-NEXT: negl %edx
+; X64-NOBMI-NEXT: negb %dl
; X64-NOBMI-NEXT: movl %edx, %ecx
; X64-NOBMI-NEXT: shlq %cl, %rbx
; X64-NOBMI-NEXT: shrq %cl, %rbx
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: subl $8, %esp
; X86-NOBMI-NEXT: xorl %ecx, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-BMI1NOTBM-NEXT: pushl %esi
; X86-BMI1NOTBM-NEXT: subl $8, %esp
; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-BMI1NOTBM-NEXT: shrl %cl, %esi
;
; X86-BMI1BMI2-LABEL: bzhi32_c0:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: pushl %esi
+; X86-BMI1BMI2-NEXT: pushl %ebx
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: negl %eax
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
+; X86-BMI1BMI2-NEXT: movl %ebx, %eax
+; X86-BMI1BMI2-NEXT: negb %al
; X86-BMI1BMI2-NEXT: movl $-1, %ecx
; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
; X86-BMI1BMI2-NEXT: movl %eax, (%esp)
; X86-BMI1BMI2-NEXT: calll use32
-; X86-BMI1BMI2-NEXT: bzhil %esi, {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: addl $8, %esp
-; X86-BMI1BMI2-NEXT: popl %esi
+; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi32_c0:
; X64-NOBMI-NEXT: pushq %rax
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: movl %edi, %ebx
-; X64-NOBMI-NEXT: negl %ecx
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: movl $-1, %ebp
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %ebp
; X64-BMI1NOTBM-NEXT: pushq %rax
; X64-BMI1NOTBM-NEXT: movl %esi, %ecx
; X64-BMI1NOTBM-NEXT: movl %edi, %ebx
-; X64-BMI1NOTBM-NEXT: negl %ecx
+; X64-BMI1NOTBM-NEXT: negb %cl
; X64-BMI1NOTBM-NEXT: movl $-1, %ebp
; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp
; X64-BMI1BMI2-NEXT: pushq %rax
; X64-BMI1BMI2-NEXT: movl %esi, %ebx
; X64-BMI1BMI2-NEXT: movl %edi, %ebp
-; X64-BMI1BMI2-NEXT: movl %esi, %eax
-; X64-BMI1BMI2-NEXT: negl %eax
+; X64-BMI1BMI2-NEXT: movl %ebx, %eax
+; X64-BMI1BMI2-NEXT: negb %al
; X64-BMI1BMI2-NEXT: movl $-1, %ecx
; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi
; X64-BMI1BMI2-NEXT: callq use32
; X86-NOBMI-NEXT: subl $8, %esp
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: xorl %ecx, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %edx
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %edx
; X86-BMI1NOTBM-NEXT: subl $8, %esp
; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: movl $-1, %edx
; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-BMI1NOTBM-NEXT: shrl %cl, %edx
; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: subl $8, %esp
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %esi
-; X86-BMI1BMI2-NEXT: negl %ecx
+; X86-BMI1BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx
+; X86-BMI1BMI2-NEXT: negb %cl
; X86-BMI1BMI2-NEXT: movl $-1, %eax
; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax
; X86-BMI1BMI2-NEXT: movl %eax, (%esp)
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: pushq %rbx
; X64-NOBMI-NEXT: movl %esi, %ecx
-; X64-NOBMI-NEXT: negl %ecx
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: movl $-1, %eax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %eax
; X64-BMI1NOTBM: # %bb.0:
; X64-BMI1NOTBM-NEXT: pushq %rbx
; X64-BMI1NOTBM-NEXT: movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT: negl %ecx
+; X64-BMI1NOTBM-NEXT: negb %cl
; X64-BMI1NOTBM-NEXT: movl $-1, %eax
; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-BMI1NOTBM-NEXT: shrl %cl, %eax
; X64-BMI1BMI2: # %bb.0:
; X64-BMI1BMI2-NEXT: pushq %rbx
; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %ebx
-; X64-BMI1BMI2-NEXT: negl %esi
+; X64-BMI1BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi
+; X64-BMI1BMI2-NEXT: negb %sil
; X64-BMI1BMI2-NEXT: movl $-1, %eax
; X64-BMI1BMI2-NEXT: shrxl %esi, %eax, %edi
; X64-BMI1BMI2-NEXT: callq use32
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: subl $8, %esp
; X86-NOBMI-NEXT: xorl %ecx, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %esi
; X86-BMI1NOTBM-NEXT: pushl %esi
; X86-BMI1NOTBM-NEXT: subl $8, %esp
; X86-BMI1NOTBM-NEXT: xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
; X86-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-BMI1NOTBM-NEXT: shrl %cl, %esi
;
; X86-BMI1BMI2-LABEL: bzhi32_c4_commutative:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: pushl %esi
+; X86-BMI1BMI2-NEXT: pushl %ebx
; X86-BMI1BMI2-NEXT: subl $8, %esp
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT: movl %esi, %eax
-; X86-BMI1BMI2-NEXT: negl %eax
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
+; X86-BMI1BMI2-NEXT: movl %ebx, %eax
+; X86-BMI1BMI2-NEXT: negb %al
; X86-BMI1BMI2-NEXT: movl $-1, %ecx
; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
; X86-BMI1BMI2-NEXT: movl %eax, (%esp)
; X86-BMI1BMI2-NEXT: calll use32
-; X86-BMI1BMI2-NEXT: bzhil %esi, {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: addl $8, %esp
-; X86-BMI1BMI2-NEXT: popl %esi
+; X86-BMI1BMI2-NEXT: popl %ebx
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi32_c4_commutative:
; X64-NOBMI-NEXT: pushq %rax
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: movl %edi, %ebx
-; X64-NOBMI-NEXT: negl %ecx
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: movl $-1, %ebp
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %ebp
; X64-BMI1NOTBM-NEXT: pushq %rax
; X64-BMI1NOTBM-NEXT: movl %esi, %ecx
; X64-BMI1NOTBM-NEXT: movl %edi, %ebx
-; X64-BMI1NOTBM-NEXT: negl %ecx
+; X64-BMI1NOTBM-NEXT: negb %cl
; X64-BMI1NOTBM-NEXT: movl $-1, %ebp
; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-BMI1NOTBM-NEXT: shrl %cl, %ebp
; X64-BMI1BMI2-NEXT: pushq %rax
; X64-BMI1BMI2-NEXT: movl %esi, %ebx
; X64-BMI1BMI2-NEXT: movl %edi, %ebp
-; X64-BMI1BMI2-NEXT: movl %esi, %eax
-; X64-BMI1BMI2-NEXT: negl %eax
+; X64-BMI1BMI2-NEXT: movl %ebx, %eax
+; X64-BMI1BMI2-NEXT: negb %al
; X64-BMI1BMI2-NEXT: movl $-1, %ecx
; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %edi
; X64-BMI1BMI2-NEXT: callq use32
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: pushl %eax
-; X86-NOBMI-NEXT: movl $64, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: movb $64, %cl
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: movl $-1, %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-BMI1NOTBM-NEXT: pushl %edi
; X86-BMI1NOTBM-NEXT: pushl %esi
; X86-BMI1NOTBM-NEXT: pushl %eax
-; X86-BMI1NOTBM-NEXT: movl $64, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: movb $64, %cl
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
; X86-BMI1NOTBM-NEXT: movl $-1, %edi
; X86-BMI1NOTBM-NEXT: shrl %cl, %edi
; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: pushl %eax
-; X86-BMI1BMI2-NEXT: movl $64, %ecx
-; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: movb $64, %cl
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: movl $-1, %esi
; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi
; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %esi
; X64-NOBMI-NEXT: pushq %rax
; X64-NOBMI-NEXT: movq %rsi, %rcx
; X64-NOBMI-NEXT: movq %rdi, %r14
-; X64-NOBMI-NEXT: negl %ecx
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: movq $-1, %rbx
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %rbx
; X64-BMI1NOTBM-NEXT: pushq %rax
; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx
; X64-BMI1NOTBM-NEXT: movq %rdi, %r14
-; X64-BMI1NOTBM-NEXT: negl %ecx
+; X64-BMI1NOTBM-NEXT: negb %cl
; X64-BMI1NOTBM-NEXT: movq $-1, %rbx
; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx
; X64-BMI1BMI2-NEXT: movq %rsi, %rbx
; X64-BMI1BMI2-NEXT: movq %rdi, %r14
; X64-BMI1BMI2-NEXT: movl %ebx, %eax
-; X64-BMI1BMI2-NEXT: negl %eax
+; X64-BMI1BMI2-NEXT: negb %al
; X64-BMI1BMI2-NEXT: movq $-1, %rcx
; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi
; X64-BMI1BMI2-NEXT: callq use64
; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT: movl $64, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT: movl $-1, %edx
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movb $64, %cl
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
-; X86-NOBMI-NEXT: shrdl %cl, %edx, %edx
+; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB27_2
; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %ebx, %edx
+; X86-NOBMI-NEXT: movl %ebx, %eax
; X86-NOBMI-NEXT: xorl %ebx, %ebx
; X86-NOBMI-NEXT: .LBB27_2:
-; X86-NOBMI-NEXT: movl (%eax), %esi
-; X86-NOBMI-NEXT: andl %edx, %esi
-; X86-NOBMI-NEXT: movl 4(%eax), %edi
+; X86-NOBMI-NEXT: movl (%edx), %esi
+; X86-NOBMI-NEXT: andl %eax, %esi
+; X86-NOBMI-NEXT: movl 4(%edx), %edi
; X86-NOBMI-NEXT: andl %ebx, %edi
; X86-NOBMI-NEXT: subl $8, %esp
; X86-NOBMI-NEXT: pushl %ebx
-; X86-NOBMI-NEXT: pushl %edx
+; X86-NOBMI-NEXT: pushl %eax
; X86-NOBMI-NEXT: calll use64
; X86-NOBMI-NEXT: addl $16, %esp
; X86-NOBMI-NEXT: movl %esi, %eax
; X86-BMI1NOTBM-NEXT: pushl %ebx
; X86-BMI1NOTBM-NEXT: pushl %edi
; X86-BMI1NOTBM-NEXT: pushl %esi
-; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT: movl $64, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT: movl $-1, %edx
+; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1NOTBM-NEXT: movb $64, %cl
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT: movl $-1, %eax
; X86-BMI1NOTBM-NEXT: movl $-1, %ebx
; X86-BMI1NOTBM-NEXT: shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT: shrdl %cl, %edx, %edx
+; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %eax
; X86-BMI1NOTBM-NEXT: testb $32, %cl
; X86-BMI1NOTBM-NEXT: je .LBB27_2
; X86-BMI1NOTBM-NEXT: # %bb.1:
-; X86-BMI1NOTBM-NEXT: movl %ebx, %edx
+; X86-BMI1NOTBM-NEXT: movl %ebx, %eax
; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx
; X86-BMI1NOTBM-NEXT: .LBB27_2:
-; X86-BMI1NOTBM-NEXT: movl (%eax), %esi
-; X86-BMI1NOTBM-NEXT: andl %edx, %esi
-; X86-BMI1NOTBM-NEXT: movl 4(%eax), %edi
+; X86-BMI1NOTBM-NEXT: movl (%edx), %esi
+; X86-BMI1NOTBM-NEXT: andl %eax, %esi
+; X86-BMI1NOTBM-NEXT: movl 4(%edx), %edi
; X86-BMI1NOTBM-NEXT: andl %ebx, %edi
; X86-BMI1NOTBM-NEXT: subl $8, %esp
; X86-BMI1NOTBM-NEXT: pushl %ebx
-; X86-BMI1NOTBM-NEXT: pushl %edx
+; X86-BMI1NOTBM-NEXT: pushl %eax
; X86-BMI1NOTBM-NEXT: calll use64
; X86-BMI1NOTBM-NEXT: addl $16, %esp
; X86-BMI1NOTBM-NEXT: movl %esi, %eax
; X86-BMI1BMI2-NEXT: pushl %ebx
; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT: movl $64, %ecx
-; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT: movl $-1, %edx
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %edx, %ebx
-; X86-BMI1BMI2-NEXT: shrdl %cl, %edx, %edx
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1BMI2-NEXT: movb $64, %cl
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT: movl $-1, %eax
+; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %ebx
+; X86-BMI1BMI2-NEXT: shrdl %cl, %eax, %eax
; X86-BMI1BMI2-NEXT: testb $32, %cl
; X86-BMI1BMI2-NEXT: je .LBB27_2
; X86-BMI1BMI2-NEXT: # %bb.1:
-; X86-BMI1BMI2-NEXT: movl %ebx, %edx
+; X86-BMI1BMI2-NEXT: movl %ebx, %eax
; X86-BMI1BMI2-NEXT: xorl %ebx, %ebx
; X86-BMI1BMI2-NEXT: .LBB27_2:
-; X86-BMI1BMI2-NEXT: movl (%eax), %esi
-; X86-BMI1BMI2-NEXT: andl %edx, %esi
-; X86-BMI1BMI2-NEXT: movl 4(%eax), %edi
+; X86-BMI1BMI2-NEXT: movl (%edx), %esi
+; X86-BMI1BMI2-NEXT: andl %eax, %esi
+; X86-BMI1BMI2-NEXT: movl 4(%edx), %edi
; X86-BMI1BMI2-NEXT: andl %ebx, %edi
; X86-BMI1BMI2-NEXT: subl $8, %esp
; X86-BMI1BMI2-NEXT: pushl %ebx
-; X86-BMI1BMI2-NEXT: pushl %edx
+; X86-BMI1BMI2-NEXT: pushl %eax
; X86-BMI1BMI2-NEXT: calll use64
; X86-BMI1BMI2-NEXT: addl $16, %esp
; X86-BMI1BMI2-NEXT: movl %esi, %eax
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: pushq %rbx
; X64-NOBMI-NEXT: movq %rsi, %rcx
-; X64-NOBMI-NEXT: negl %ecx
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: movq $-1, %rax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %rax
; X64-BMI1NOTBM: # %bb.0:
; X64-BMI1NOTBM-NEXT: pushq %rbx
; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT: negl %ecx
+; X64-BMI1NOTBM-NEXT: negb %cl
; X64-BMI1NOTBM-NEXT: movq $-1, %rax
; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-BMI1NOTBM-NEXT: shrq %cl, %rax
; X64-BMI1BMI2: # %bb.0:
; X64-BMI1BMI2-NEXT: pushq %rbx
; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rbx
-; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi killed $rsi def $rsi
-; X64-BMI1BMI2-NEXT: negl %esi
+; X64-BMI1BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi
+; X64-BMI1BMI2-NEXT: negb %sil
; X64-BMI1BMI2-NEXT: movq $-1, %rax
; X64-BMI1BMI2-NEXT: shrxq %rsi, %rax, %rdi
; X64-BMI1BMI2-NEXT: callq use64
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: pushl %eax
-; X86-NOBMI-NEXT: movl $64, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: movb $64, %cl
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: movl $-1, %edi
; X86-NOBMI-NEXT: shrl %cl, %edi
; X86-BMI1NOTBM-NEXT: pushl %edi
; X86-BMI1NOTBM-NEXT: pushl %esi
; X86-BMI1NOTBM-NEXT: pushl %eax
-; X86-BMI1NOTBM-NEXT: movl $64, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: movb $64, %cl
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: movl $-1, %esi
; X86-BMI1NOTBM-NEXT: movl $-1, %edi
; X86-BMI1NOTBM-NEXT: shrl %cl, %edi
; X86-BMI1BMI2-NEXT: pushl %edi
; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: pushl %eax
-; X86-BMI1BMI2-NEXT: movl $64, %ecx
-; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: movb $64, %cl
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: movl $-1, %esi
; X86-BMI1BMI2-NEXT: shrxl %ecx, %esi, %edi
; X86-BMI1BMI2-NEXT: shrdl %cl, %esi, %esi
; X64-NOBMI-NEXT: pushq %rax
; X64-NOBMI-NEXT: movq %rsi, %rcx
; X64-NOBMI-NEXT: movq %rdi, %r14
-; X64-NOBMI-NEXT: negl %ecx
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: movq $-1, %rbx
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %rbx
; X64-BMI1NOTBM-NEXT: pushq %rax
; X64-BMI1NOTBM-NEXT: movq %rsi, %rcx
; X64-BMI1NOTBM-NEXT: movq %rdi, %r14
-; X64-BMI1NOTBM-NEXT: negl %ecx
+; X64-BMI1NOTBM-NEXT: negb %cl
; X64-BMI1NOTBM-NEXT: movq $-1, %rbx
; X64-BMI1NOTBM-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-BMI1NOTBM-NEXT: shrq %cl, %rbx
; X64-BMI1BMI2-NEXT: movq %rsi, %rbx
; X64-BMI1BMI2-NEXT: movq %rdi, %r14
; X64-BMI1BMI2-NEXT: movl %ebx, %eax
-; X64-BMI1BMI2-NEXT: negl %eax
+; X64-BMI1BMI2-NEXT: negb %al
; X64-BMI1BMI2-NEXT: movq $-1, %rcx
; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rdi
; X64-BMI1BMI2-NEXT: callq use64
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: xorl %ecx, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %eax
;
; X86-BMI1NOTBM-LABEL: bzhi32_d0:
; X86-BMI1NOTBM: # %bb.0:
-; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI1NOTBM-NEXT: shll $8, %eax
; X86-BMI1NOTBM-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1NOTBM-NEXT: retl
;
; X86-BMI1BMI2-LABEL: bzhi32_d0:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: movl %edi, %eax
-; X64-NOBMI-NEXT: negl %ecx
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shll %cl, %eax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %eax
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %eax
; X86-NOBMI-NEXT: xorl %ecx, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %eax
; X86-BMI1NOTBM-LABEL: bzhi32_d2_load:
; X86-BMI1NOTBM: # %bb.0:
; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: shll $8, %ecx
; X86-BMI1NOTBM-NEXT: bextrl %ecx, (%eax), %eax
; X86-BMI1NOTBM-NEXT: retl
; X86-BMI1BMI2-LABEL: bzhi32_d2_load:
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT: bzhil %eax, (%ecx), %eax
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi32_d2_load:
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: movl (%rdi), %eax
-; X64-NOBMI-NEXT: negl %ecx
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shll %cl, %eax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %eax
; X86-NOBMI-NEXT: pushl %esi
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT: movl $64, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: movb $64, %cl
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl %edx, %esi
; X86-NOBMI-NEXT: shll %cl, %esi
; X86-NOBMI-NEXT: shldl %cl, %edx, %eax
; X86-BMI1NOTBM-NEXT: pushl %esi
; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT: movl $64, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: movb $64, %cl
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: movl %edx, %esi
; X86-BMI1NOTBM-NEXT: shll %cl, %esi
; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax
; X86-BMI1BMI2-NEXT: pushl %esi
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT: movl $64, %ecx
-; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: movb $64, %cl
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %esi
; X86-BMI1BMI2-NEXT: shlxl %ecx, %eax, %edi
; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movq %rsi, %rcx
; X64-NOBMI-NEXT: movq %rdi, %rax
-; X64-NOBMI-NEXT: negl %ecx
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shlq %cl, %rax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %rax
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %edx
; X86-NOBMI-NEXT: movl 4(%eax), %eax
-; X86-NOBMI-NEXT: movl $64, %ecx
-; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: movb $64, %cl
+; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: movl %edx, %esi
; X86-NOBMI-NEXT: shll %cl, %esi
; X86-NOBMI-NEXT: shldl %cl, %edx, %eax
; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1NOTBM-NEXT: movl (%eax), %edx
; X86-BMI1NOTBM-NEXT: movl 4(%eax), %eax
-; X86-BMI1NOTBM-NEXT: movl $64, %ecx
-; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT: movb $64, %cl
+; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1NOTBM-NEXT: movl %edx, %esi
; X86-BMI1NOTBM-NEXT: shll %cl, %esi
; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %eax
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: movl (%eax), %edx
; X86-BMI1BMI2-NEXT: movl 4(%eax), %esi
-; X86-BMI1BMI2-NEXT: movl $64, %ecx
-; X86-BMI1BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT: movb $64, %cl
+; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI1BMI2-NEXT: shldl %cl, %edx, %esi
; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %edi
; X86-BMI1BMI2-NEXT: xorl %edx, %edx
; X64-NOBMI: # %bb.0:
; X64-NOBMI-NEXT: movq %rsi, %rcx
; X64-NOBMI-NEXT: movq (%rdi), %rax
-; X64-NOBMI-NEXT: negl %ecx
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shlq %cl, %rax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %rax
; X86-FAST-NEXT: movl %edi, %ebp
; X86-FAST-NEXT: xorl %edi, %edi
; X86-FAST-NEXT: .LBB3_2:
-; X86-FAST-NEXT: movl $64, %ecx
-; X86-FAST-NEXT: subl %ebx, %ecx
+; X86-FAST-NEXT: movb $64, %cl
+; X86-FAST-NEXT: subb %bl, %cl
; X86-FAST-NEXT: movl %edx, %esi
; X86-FAST-NEXT: shrl %cl, %esi
; X86-FAST-NEXT: shrdl %cl, %edx, (%esp) # 4-byte Folded Spill
; X86-SLOW-NEXT: pushl %edi
; X86-SLOW-NEXT: pushl %esi
; X86-SLOW-NEXT: subl $8, %esp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SLOW-NEXT: andl $63, %ebx
-; X86-SLOW-NEXT: movl $64, %ecx
-; X86-SLOW-NEXT: subl %ebx, %ecx
+; X86-SLOW-NEXT: movb $64, %dh
+; X86-SLOW-NEXT: subb %bl, %dh
; X86-SLOW-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movb %dh, %cl
; X86-SLOW-NEXT: shrl %cl, %eax
-; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movb %cl, %ch
-; X86-SLOW-NEXT: andb $31, %ch
-; X86-SLOW-NEXT: movb %ch, %cl
+; X86-SLOW-NEXT: movb %dh, %dl
+; X86-SLOW-NEXT: andb $31, %dl
+; X86-SLOW-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: negb %cl
-; X86-SLOW-NEXT: movl %edi, %esi
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: testb %ch, %ch
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SLOW-NEXT: movl %esi, %ebp
+; X86-SLOW-NEXT: shll %cl, %ebp
+; X86-SLOW-NEXT: testb %dl, %dl
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: je .LBB3_2
; X86-SLOW-NEXT: # %bb.1:
-; X86-SLOW-NEXT: orl %eax, %esi
-; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: orl %eax, %ebp
+; X86-SLOW-NEXT: movl %ebp, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: .LBB3_2:
-; X86-SLOW-NEXT: movl %edx, %eax
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-SLOW-NEXT: movl %ebp, %eax
; X86-SLOW-NEXT: movl %ebx, %ecx
; X86-SLOW-NEXT: shll %cl, %eax
; X86-SLOW-NEXT: movb %bl, %ch
; X86-SLOW-NEXT: andb $31, %ch
; X86-SLOW-NEXT: movb %ch, %cl
; X86-SLOW-NEXT: negb %cl
-; X86-SLOW-NEXT: movl %edx, %esi
-; X86-SLOW-NEXT: movl %ebp, %edx
-; X86-SLOW-NEXT: shrl %cl, %ebp
+; X86-SLOW-NEXT: shrl %cl, %edi
; X86-SLOW-NEXT: testb %ch, %ch
; X86-SLOW-NEXT: je .LBB3_4
; X86-SLOW-NEXT: # %bb.3:
-; X86-SLOW-NEXT: orl %ebp, %eax
-; X86-SLOW-NEXT: movl %eax, %esi
+; X86-SLOW-NEXT: orl %edi, %eax
+; X86-SLOW-NEXT: movl %eax, %ebp
; X86-SLOW-NEXT: .LBB3_4:
-; X86-SLOW-NEXT: movl %edx, %eax
-; X86-SLOW-NEXT: movl %edx, %ebp
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: movl %eax, %edi
; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shll %cl, %ebp
+; X86-SLOW-NEXT: shll %cl, %edi
; X86-SLOW-NEXT: testb $32, %bl
; X86-SLOW-NEXT: je .LBB3_6
; X86-SLOW-NEXT: # %bb.5:
-; X86-SLOW-NEXT: movl %ebp, %esi
-; X86-SLOW-NEXT: xorl %ebp, %ebp
+; X86-SLOW-NEXT: movl %edi, %ebp
+; X86-SLOW-NEXT: xorl %edi, %edi
; X86-SLOW-NEXT: .LBB3_6:
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: testb $32, %cl
+; X86-SLOW-NEXT: movb %dh, %cl
+; X86-SLOW-NEXT: shrl %cl, %esi
+; X86-SLOW-NEXT: testb $32, %dh
; X86-SLOW-NEXT: jne .LBB3_7
; X86-SLOW-NEXT: # %bb.8:
; X86-SLOW-NEXT: movl (%esp), %ecx # 4-byte Reload
; X86-SLOW-NEXT: jne .LBB3_10
; X86-SLOW-NEXT: jmp .LBB3_11
; X86-SLOW-NEXT: .LBB3_7:
-; X86-SLOW-NEXT: movl %edi, %ecx
-; X86-SLOW-NEXT: xorl %edi, %edi
+; X86-SLOW-NEXT: movl %esi, %ecx
+; X86-SLOW-NEXT: xorl %esi, %esi
; X86-SLOW-NEXT: testl %ebx, %ebx
; X86-SLOW-NEXT: je .LBB3_11
; X86-SLOW-NEXT: .LBB3_10:
-; X86-SLOW-NEXT: orl %edi, %esi
-; X86-SLOW-NEXT: orl %ecx, %ebp
-; X86-SLOW-NEXT: movl %esi, %edx
-; X86-SLOW-NEXT: movl %ebp, %eax
+; X86-SLOW-NEXT: orl %esi, %ebp
+; X86-SLOW-NEXT: orl %ecx, %edi
+; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edi, %eax
; X86-SLOW-NEXT: .LBB3_11:
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-SLOW-NEXT: addl $8, %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
; X86-FAST-NEXT: pushl %eax
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-FAST-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-FAST-NEXT: andl $63, %ebx
-; X86-FAST-NEXT: movl $64, %ecx
-; X86-FAST-NEXT: subl %ebx, %ecx
+; X86-FAST-NEXT: movb $64, %cl
+; X86-FAST-NEXT: subb %bl, %cl
; X86-FAST-NEXT: movl %eax, %edi
; X86-FAST-NEXT: shll %cl, %edi
; X86-FAST-NEXT: shldl %cl, %eax, %esi
; X86-FAST-NEXT: testb $32, %cl
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-FAST-NEXT: je .LBB3_2
; X86-FAST-NEXT: # %bb.1:
; X86-FAST-NEXT: movl %edi, %esi
; X86-SLOW-NEXT: pushl %edi
; X86-SLOW-NEXT: pushl %esi
; X86-SLOW-NEXT: subl $8, %esp
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SLOW-NEXT: andl $63, %ebx
-; X86-SLOW-NEXT: movl $64, %eax
-; X86-SLOW-NEXT: subl %ebx, %eax
+; X86-SLOW-NEXT: movb $64, %al
+; X86-SLOW-NEXT: subb %bl, %al
; X86-SLOW-NEXT: movl %edx, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: movl %eax, %ecx
; X86-SLOW-NEXT: shll %cl, %edx
; X86-SLOW-NEXT: andb $31, %ch
; X86-SLOW-NEXT: movb %ch, %cl
; X86-SLOW-NEXT: negb %cl
-; X86-SLOW-NEXT: movl %edi, %ebp
-; X86-SLOW-NEXT: shrl %cl, %ebp
+; X86-SLOW-NEXT: movl %esi, %edi
+; X86-SLOW-NEXT: shrl %cl, %edi
; X86-SLOW-NEXT: testb %ch, %ch
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-SLOW-NEXT: je .LBB3_2
; X86-SLOW-NEXT: # %bb.1:
-; X86-SLOW-NEXT: orl %ebp, %edx
+; X86-SLOW-NEXT: orl %edi, %edx
; X86-SLOW-NEXT: movl %edx, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: .LBB3_2:
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SLOW-NEXT: movl %ecx, %edx
; X86-SLOW-NEXT: movl %ebx, %ecx
; X86-SLOW-NEXT: shrl %cl, %edx
-; X86-SLOW-NEXT: movb %bl, %ch
-; X86-SLOW-NEXT: andb $31, %ch
-; X86-SLOW-NEXT: movb %ch, %cl
+; X86-SLOW-NEXT: movb %bl, %ah
+; X86-SLOW-NEXT: andb $31, %ah
+; X86-SLOW-NEXT: movb %ah, %cl
; X86-SLOW-NEXT: negb %cl
-; X86-SLOW-NEXT: movl %esi, %ebp
-; X86-SLOW-NEXT: shll %cl, %ebp
-; X86-SLOW-NEXT: testb %ch, %ch
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SLOW-NEXT: movl %ebp, %edi
+; X86-SLOW-NEXT: shll %cl, %edi
+; X86-SLOW-NEXT: testb %ah, %ah
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-SLOW-NEXT: je .LBB3_4
; X86-SLOW-NEXT: # %bb.3:
-; X86-SLOW-NEXT: orl %edx, %ebp
-; X86-SLOW-NEXT: movl %ebp, %esi
+; X86-SLOW-NEXT: orl %edx, %edi
+; X86-SLOW-NEXT: movl %edi, %ebp
; X86-SLOW-NEXT: .LBB3_4:
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shrl %cl, %ebp
+; X86-SLOW-NEXT: shrl %cl, %edi
; X86-SLOW-NEXT: testb $32, %bl
; X86-SLOW-NEXT: je .LBB3_6
; X86-SLOW-NEXT: # %bb.5:
-; X86-SLOW-NEXT: movl %ebp, %esi
-; X86-SLOW-NEXT: xorl %ebp, %ebp
+; X86-SLOW-NEXT: movl %edi, %ebp
+; X86-SLOW-NEXT: xorl %edi, %edi
; X86-SLOW-NEXT: .LBB3_6:
; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %edi
+; X86-SLOW-NEXT: shll %cl, %esi
; X86-SLOW-NEXT: testb $32, %al
; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SLOW-NEXT: jne .LBB3_7
; X86-SLOW-NEXT: jne .LBB3_10
; X86-SLOW-NEXT: jmp .LBB3_11
; X86-SLOW-NEXT: .LBB3_7:
-; X86-SLOW-NEXT: movl %edi, %eax
-; X86-SLOW-NEXT: xorl %edi, %edi
+; X86-SLOW-NEXT: movl %esi, %eax
+; X86-SLOW-NEXT: xorl %esi, %esi
; X86-SLOW-NEXT: testl %ebx, %ebx
; X86-SLOW-NEXT: je .LBB3_11
; X86-SLOW-NEXT: .LBB3_10:
-; X86-SLOW-NEXT: orl %esi, %edi
-; X86-SLOW-NEXT: orl %ebp, %eax
-; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: orl %ebp, %esi
+; X86-SLOW-NEXT: orl %edi, %eax
+; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl %eax, %edx
; X86-SLOW-NEXT: .LBB3_11:
; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-SSE2-NEXT: pushl %ebx
; X32-SSE2-NEXT: pushl %edi
; X32-SSE2-NEXT: pushl %esi
+; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X32-SSE2-NEXT: movl %ebx, %ecx
-; X32-SSE2-NEXT: andl $63, %ecx
; X32-SSE2-NEXT: movl %edx, %edi
; X32-SSE2-NEXT: shrl %cl, %edi
-; X32-SSE2-NEXT: movl %esi, %ebp
-; X32-SSE2-NEXT: shrdl %cl, %edx, %ebp
-; X32-SSE2-NEXT: xorl %eax, %eax
+; X32-SSE2-NEXT: movl %esi, %ebx
+; X32-SSE2-NEXT: shrdl %cl, %edx, %ebx
+; X32-SSE2-NEXT: xorl %ebp, %ebp
; X32-SSE2-NEXT: testb $32, %cl
-; X32-SSE2-NEXT: cmovnel %edi, %ebp
-; X32-SSE2-NEXT: cmovnel %eax, %edi
-; X32-SSE2-NEXT: negl %ebx
-; X32-SSE2-NEXT: andl $63, %ebx
+; X32-SSE2-NEXT: cmovnel %edi, %ebx
+; X32-SSE2-NEXT: cmovnel %ebp, %edi
+; X32-SSE2-NEXT: negb %cl
; X32-SSE2-NEXT: movl %esi, %eax
-; X32-SSE2-NEXT: movl %ebx, %ecx
; X32-SSE2-NEXT: shll %cl, %eax
; X32-SSE2-NEXT: shldl %cl, %esi, %edx
-; X32-SSE2-NEXT: testb $32, %bl
+; X32-SSE2-NEXT: testb $32, %cl
; X32-SSE2-NEXT: cmovnel %eax, %edx
-; X32-SSE2-NEXT: movl $0, %ecx
-; X32-SSE2-NEXT: cmovnel %ecx, %eax
-; X32-SSE2-NEXT: orl %ebp, %eax
+; X32-SSE2-NEXT: cmovnel %ebp, %eax
+; X32-SSE2-NEXT: orl %ebx, %eax
; X32-SSE2-NEXT: orl %edi, %edx
; X32-SSE2-NEXT: popl %esi
; X32-SSE2-NEXT: popl %edi
; X32-SSE2-NEXT: cmovnel %ebp, %eax
; X32-SSE2-NEXT: cmovnel %ecx, %ebp
; X32-SSE2-NEXT: xorl %edx, %edx
-; X32-SSE2-NEXT: movl $37, %ecx
-; X32-SSE2-NEXT: subl %ebx, %ecx
+; X32-SSE2-NEXT: movb $37, %cl
+; X32-SSE2-NEXT: subb %bl, %cl
; X32-SSE2-NEXT: shrdl %cl, %esi, %edi
; X32-SSE2-NEXT: shrl %cl, %esi
; X32-SSE2-NEXT: testb $32, %cl
; X32-SSE2-NEXT: calll __umoddi3
; X32-SSE2-NEXT: addl $16, %esp
; X32-SSE2-NEXT: movl %eax, %ebx
-; X32-SSE2-NEXT: movl $37, %ecx
-; X32-SSE2-NEXT: subl %eax, %ecx
+; X32-SSE2-NEXT: movb $37, %cl
+; X32-SSE2-NEXT: subb %bl, %cl
; X32-SSE2-NEXT: movl %ebp, %eax
; X32-SSE2-NEXT: shll %cl, %ebp
; X32-SSE2-NEXT: shldl %cl, %eax, %edi
; 686-NEXT: movzbl c, %eax
; 686-NEXT: xorl %ecx, %ecx
; 686-NEXT: testl %eax, %eax
-; 686-NEXT: setne %cl
-; 686-NEXT: testb %al, %al
; 686-NEXT: setne {{[0-9]+}}(%esp)
+; 686-NEXT: setne %cl
; 686-NEXT: xorl %edx, %edx
; 686-NEXT: cmpl %eax, %ecx
; 686-NEXT: setle %dl
; CHECK: ## %bb.0:
; CHECK-NEXT: movq (%rax), %rax
; CHECK-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
-; CHECK-NEXT: ## kill: def $eax killed $eax killed $rax
-; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
; X64-NEXT: movl %edx, %ecx
; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shldw %cl, %ax, %ax
+; X64-NEXT: rolw %cl, %ax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%t0 = shl i16 %x, %z
; X64-NEXT: movl %edx, %ecx
; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrdw %cl, %ax, %ax
+; X64-NEXT: rorw %cl, %ax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%t0 = lshr i16 %x, %z
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl %esi, %eax
; X86-NEXT: shll %cl, %eax
; X86-NEXT: testb $32, %cl
-; X86-NEXT: movl $0, %ebx
+; X86-NEXT: movl $0, %edi
; X86-NEXT: jne .LBB28_2
; X86-NEXT: # %bb.1: # %entry
-; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl %eax, %edi
; X86-NEXT: .LBB28_2: # %entry
-; X86-NEXT: movl $64, %edx
-; X86-NEXT: subl %ecx, %edx
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movb $64, %dl
+; X86-NEXT: subb %cl, %dl
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: shrl %cl, %eax
-; X86-NEXT: shrdl %cl, %edi, %esi
+; X86-NEXT: shrdl %cl, %ebx, %esi
; X86-NEXT: testb $32, %dl
; X86-NEXT: jne .LBB28_4
; X86-NEXT: # %bb.3: # %entry
; X86-NEXT: movl %esi, %eax
; X86-NEXT: .LBB28_4: # %entry
-; X86-NEXT: orl %ebx, %eax
+; X86-NEXT: orl %edi, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: .cfi_offset %esi, -16
; X86-NEXT: .cfi_offset %edi, -12
; X86-NEXT: .cfi_offset %ebx, -8
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %esi, %eax
; X86-NEXT: shll %cl, %eax
; X86-NEXT: movl %edi, %edx
; X86-NEXT: movl %eax, %edx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .LBB2_2:
-; X86-NEXT: negl %ecx
+; X86-NEXT: negb %cl
; X86-NEXT: movl %edi, %ebx
; X86-NEXT: shrl %cl, %ebx
; X86-NEXT: shrdl %cl, %edi, %esi
; X86-NEXT: .cfi_offset %esi, -16
; X86-NEXT: .cfi_offset %edi, -12
; X86-NEXT: .cfi_offset %ebx, -8
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %esi, %edx
; X86-NEXT: shrl %cl, %edx
; X86-NEXT: movl %edi, %eax
; X86-NEXT: movl %edx, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: .LBB3_2:
-; X86-NEXT: negl %ecx
+; X86-NEXT: negb %cl
; X86-NEXT: movl %edi, %ebx
; X86-NEXT: shll %cl, %ebx
; X86-NEXT: shldl %cl, %edi, %esi
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: movl 4(%eax), %ebx
; X86-NEXT: movl %esi, %edi
; X86-NEXT: xorl %esi, %esi
; X86-NEXT: .LBB6_2:
-; X86-NEXT: negl %ecx
+; X86-NEXT: negb %cl
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: shrl %cl, %ebp
; X86-NEXT: shrdl %cl, %ebx, %edx
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %ebx
-; X86-NEXT: movl 4(%eax), %edx
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: shrl %cl, %esi
+; X86-NEXT: movl 4(%eax), %esi
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: shrl %cl, %edx
; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: shrdl %cl, %edx, %edi
+; X86-NEXT: shrdl %cl, %esi, %edi
; X86-NEXT: testb $32, %cl
; X86-NEXT: je .LBB7_2
; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %esi, %edi
-; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: xorl %edx, %edx
; X86-NEXT: .LBB7_2:
-; X86-NEXT: negl %ecx
+; X86-NEXT: negb %cl
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: shll %cl, %ebp
-; X86-NEXT: shldl %cl, %ebx, %edx
+; X86-NEXT: shldl %cl, %ebx, %esi
; X86-NEXT: testb $32, %cl
; X86-NEXT: je .LBB7_4
; X86-NEXT: # %bb.3:
-; X86-NEXT: movl %ebp, %edx
+; X86-NEXT: movl %ebp, %esi
; X86-NEXT: xorl %ebp, %ebp
; X86-NEXT: .LBB7_4:
-; X86-NEXT: orl %edx, %esi
+; X86-NEXT: orl %esi, %edx
; X86-NEXT: orl %ebp, %edi
-; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
; X86-NEXT: movl %edi, (%eax)
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 16
; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.50]
; BDVER12-NEXT: shlq %cl, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: negl %ecx # sched: [1:0.50]
+; BDVER12-NEXT: negb %cl # sched: [1:0.50]
; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER12-NEXT: shrq %cl, %rax # sched: [1:0.50]
; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50]
; BTVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: negl %ecx # sched: [1:0.50]
+; BTVER2-NEXT: negb %cl # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shrq %cl, %rax # sched: [1:0.50]
; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50]
; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.50]
; BDVER12-NEXT: shrq %cl, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: negl %ecx # sched: [1:0.50]
+; BDVER12-NEXT: negb %cl # sched: [1:0.50]
; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER12-NEXT: shlq %cl, %rax # sched: [1:0.50]
; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50]
; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: negl %ecx # sched: [1:0.50]
+; BTVER2-NEXT: negb %cl # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50]
; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50]
; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
; BDVER12-NEXT: movq %rsi, %rcx # sched: [1:0.50]
; BDVER12-NEXT: shlq %cl, %rax # sched: [1:0.50]
-; BDVER12-NEXT: negl %ecx # sched: [1:0.50]
+; BDVER12-NEXT: negb %cl # sched: [1:0.50]
; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER12-NEXT: shrq %cl, %rdi # sched: [1:0.50]
; BDVER12-NEXT: orq %rax, %rdi # sched: [1:0.50]
; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00]
; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50]
; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50]
-; BTVER2-NEXT: negl %ecx # sched: [1:0.50]
+; BTVER2-NEXT: negb %cl # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50]
; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50]
; ILP-NEXT: pushq %rbx
; ILP-NEXT: movq %rdi, %rax
; ILP-NEXT: xorl %r8d, %r8d
-; ILP-NEXT: leal 3(%rsi,%rsi), %r11d
+; ILP-NEXT: incl %esi
+; ILP-NEXT: addb %sil, %sil
+; ILP-NEXT: orb $1, %sil
; ILP-NEXT: movl $1, %r9d
; ILP-NEXT: xorl %r14d, %r14d
-; ILP-NEXT: movl %r11d, %ecx
+; ILP-NEXT: movl %esi, %ecx
; ILP-NEXT: shldq %cl, %r9, %r14
+; ILP-NEXT: movl $1, %edx
+; ILP-NEXT: shlq %cl, %rdx
+; ILP-NEXT: movl %esi, %r11d
+; ILP-NEXT: addb $-128, %r11b
+; ILP-NEXT: movb $-128, %r10b
+; ILP-NEXT: xorl %ebx, %ebx
+; ILP-NEXT: movl %r11d, %ecx
+; ILP-NEXT: shldq %cl, %r9, %rbx
+; ILP-NEXT: testb $64, %sil
+; ILP-NEXT: cmovneq %rdx, %r14
+; ILP-NEXT: cmovneq %r8, %rdx
; ILP-NEXT: movl $1, %edi
; ILP-NEXT: shlq %cl, %rdi
-; ILP-NEXT: movb $-128, %r10b
-; ILP-NEXT: subb %r11b, %r10b
-; ILP-NEXT: movl %r11d, %edx
-; ILP-NEXT: addb $-128, %dl
-; ILP-NEXT: xorl %esi, %esi
-; ILP-NEXT: movl %edx, %ecx
-; ILP-NEXT: shldq %cl, %r9, %rsi
-; ILP-NEXT: movl $1, %ebx
-; ILP-NEXT: shlq %cl, %rbx
+; ILP-NEXT: subb %sil, %r10b
; ILP-NEXT: movl %r10d, %ecx
; ILP-NEXT: shrdq %cl, %r8, %r9
-; ILP-NEXT: testb $64, %r11b
-; ILP-NEXT: cmovneq %rdi, %r14
-; ILP-NEXT: cmovneq %r8, %rdi
; ILP-NEXT: testb $64, %r10b
; ILP-NEXT: cmovneq %r8, %r9
-; ILP-NEXT: testb $64, %dl
-; ILP-NEXT: cmovneq %rbx, %rsi
-; ILP-NEXT: cmovneq %r8, %rbx
-; ILP-NEXT: testb %r11b, %r11b
+; ILP-NEXT: testb $64, %r11b
+; ILP-NEXT: cmovneq %rdi, %rbx
+; ILP-NEXT: cmovneq %r8, %rdi
+; ILP-NEXT: testb %sil, %sil
; ILP-NEXT: cmovsq %r8, %r14
-; ILP-NEXT: cmovsq %r8, %rdi
+; ILP-NEXT: cmovsq %r8, %rdx
; ILP-NEXT: movq %r14, 8(%rax)
-; ILP-NEXT: movq %rdi, (%rax)
-; ILP-NEXT: cmovnsq %r8, %rsi
-; ILP-NEXT: cmoveq %r8, %rsi
-; ILP-NEXT: movq %rsi, 24(%rax)
-; ILP-NEXT: cmovnsq %r9, %rbx
+; ILP-NEXT: movq %rdx, (%rax)
+; ILP-NEXT: cmovnsq %r8, %rbx
; ILP-NEXT: cmoveq %r8, %rbx
-; ILP-NEXT: movq %rbx, 16(%rax)
+; ILP-NEXT: movq %rbx, 24(%rax)
+; ILP-NEXT: cmovnsq %r9, %rdi
+; ILP-NEXT: cmoveq %r8, %rdi
+; ILP-NEXT: movq %rdi, 16(%rax)
; ILP-NEXT: popq %rbx
; ILP-NEXT: popq %r14
; ILP-NEXT: retq
; HYBRID-LABEL: test1:
; HYBRID: # %bb.0:
; HYBRID-NEXT: movq %rdi, %rax
-; HYBRID-NEXT: leal 3(%rsi,%rsi), %r10d
+; HYBRID-NEXT: incl %esi
+; HYBRID-NEXT: addb %sil, %sil
+; HYBRID-NEXT: orb $1, %sil
; HYBRID-NEXT: movb $-128, %cl
-; HYBRID-NEXT: subb %r10b, %cl
+; HYBRID-NEXT: subb %sil, %cl
; HYBRID-NEXT: xorl %r8d, %r8d
-; HYBRID-NEXT: movl $1, %esi
+; HYBRID-NEXT: movl $1, %r11d
; HYBRID-NEXT: movl $1, %r9d
; HYBRID-NEXT: shrdq %cl, %r8, %r9
; HYBRID-NEXT: testb $64, %cl
; HYBRID-NEXT: cmovneq %r8, %r9
-; HYBRID-NEXT: xorl %r11d, %r11d
-; HYBRID-NEXT: movl %r10d, %ecx
-; HYBRID-NEXT: shldq %cl, %rsi, %r11
+; HYBRID-NEXT: xorl %r10d, %r10d
+; HYBRID-NEXT: movl %esi, %ecx
+; HYBRID-NEXT: shldq %cl, %r11, %r10
; HYBRID-NEXT: addb $-128, %cl
-; HYBRID-NEXT: xorl %edx, %edx
-; HYBRID-NEXT: shldq %cl, %rsi, %rdx
-; HYBRID-NEXT: movl $1, %edi
-; HYBRID-NEXT: shlq %cl, %rdi
+; HYBRID-NEXT: xorl %edi, %edi
+; HYBRID-NEXT: shldq %cl, %r11, %rdi
+; HYBRID-NEXT: movl $1, %edx
+; HYBRID-NEXT: shlq %cl, %rdx
; HYBRID-NEXT: testb $64, %cl
-; HYBRID-NEXT: cmovneq %rdi, %rdx
-; HYBRID-NEXT: cmovneq %r8, %rdi
-; HYBRID-NEXT: movl %r10d, %ecx
-; HYBRID-NEXT: shlq %cl, %rsi
-; HYBRID-NEXT: testb $64, %r10b
-; HYBRID-NEXT: cmovneq %rsi, %r11
-; HYBRID-NEXT: cmovneq %r8, %rsi
-; HYBRID-NEXT: testb %r10b, %r10b
+; HYBRID-NEXT: cmovneq %rdx, %rdi
+; HYBRID-NEXT: cmovneq %r8, %rdx
+; HYBRID-NEXT: movl %esi, %ecx
+; HYBRID-NEXT: shlq %cl, %r11
+; HYBRID-NEXT: testb $64, %sil
+; HYBRID-NEXT: cmovneq %r11, %r10
+; HYBRID-NEXT: cmovneq %r8, %r11
+; HYBRID-NEXT: testb %sil, %sil
+; HYBRID-NEXT: cmovsq %r8, %r10
+; HYBRID-NEXT: movq %r10, 8(%rax)
; HYBRID-NEXT: cmovsq %r8, %r11
-; HYBRID-NEXT: movq %r11, 8(%rax)
-; HYBRID-NEXT: cmovsq %r8, %rsi
-; HYBRID-NEXT: movq %rsi, (%rax)
-; HYBRID-NEXT: cmovnsq %r8, %rdx
-; HYBRID-NEXT: cmoveq %r8, %rdx
-; HYBRID-NEXT: movq %rdx, 24(%rax)
-; HYBRID-NEXT: cmovnsq %r9, %rdi
+; HYBRID-NEXT: movq %r11, (%rax)
+; HYBRID-NEXT: cmovnsq %r8, %rdi
; HYBRID-NEXT: cmoveq %r8, %rdi
-; HYBRID-NEXT: movq %rdi, 16(%rax)
+; HYBRID-NEXT: movq %rdi, 24(%rax)
+; HYBRID-NEXT: cmovnsq %r9, %rdx
+; HYBRID-NEXT: cmoveq %r8, %rdx
+; HYBRID-NEXT: movq %rdx, 16(%rax)
; HYBRID-NEXT: retq
;
; BURR-LABEL: test1:
; BURR: # %bb.0:
; BURR-NEXT: movq %rdi, %rax
-; BURR-NEXT: leal 3(%rsi,%rsi), %r10d
+; BURR-NEXT: incl %esi
+; BURR-NEXT: addb %sil, %sil
+; BURR-NEXT: orb $1, %sil
; BURR-NEXT: movb $-128, %cl
-; BURR-NEXT: subb %r10b, %cl
+; BURR-NEXT: subb %sil, %cl
; BURR-NEXT: xorl %r8d, %r8d
-; BURR-NEXT: movl $1, %esi
+; BURR-NEXT: movl $1, %r11d
; BURR-NEXT: movl $1, %r9d
; BURR-NEXT: shrdq %cl, %r8, %r9
; BURR-NEXT: testb $64, %cl
; BURR-NEXT: cmovneq %r8, %r9
-; BURR-NEXT: xorl %r11d, %r11d
-; BURR-NEXT: movl %r10d, %ecx
-; BURR-NEXT: shldq %cl, %rsi, %r11
+; BURR-NEXT: xorl %r10d, %r10d
+; BURR-NEXT: movl %esi, %ecx
+; BURR-NEXT: shldq %cl, %r11, %r10
; BURR-NEXT: addb $-128, %cl
-; BURR-NEXT: xorl %edx, %edx
-; BURR-NEXT: shldq %cl, %rsi, %rdx
-; BURR-NEXT: movl $1, %edi
-; BURR-NEXT: shlq %cl, %rdi
+; BURR-NEXT: xorl %edi, %edi
+; BURR-NEXT: shldq %cl, %r11, %rdi
+; BURR-NEXT: movl $1, %edx
+; BURR-NEXT: shlq %cl, %rdx
; BURR-NEXT: testb $64, %cl
-; BURR-NEXT: cmovneq %rdi, %rdx
-; BURR-NEXT: cmovneq %r8, %rdi
-; BURR-NEXT: movl %r10d, %ecx
-; BURR-NEXT: shlq %cl, %rsi
-; BURR-NEXT: testb $64, %r10b
-; BURR-NEXT: cmovneq %rsi, %r11
-; BURR-NEXT: cmovneq %r8, %rsi
-; BURR-NEXT: testb %r10b, %r10b
+; BURR-NEXT: cmovneq %rdx, %rdi
+; BURR-NEXT: cmovneq %r8, %rdx
+; BURR-NEXT: movl %esi, %ecx
+; BURR-NEXT: shlq %cl, %r11
+; BURR-NEXT: testb $64, %sil
+; BURR-NEXT: cmovneq %r11, %r10
+; BURR-NEXT: cmovneq %r8, %r11
+; BURR-NEXT: testb %sil, %sil
+; BURR-NEXT: cmovsq %r8, %r10
+; BURR-NEXT: movq %r10, 8(%rax)
; BURR-NEXT: cmovsq %r8, %r11
-; BURR-NEXT: movq %r11, 8(%rax)
-; BURR-NEXT: cmovsq %r8, %rsi
-; BURR-NEXT: movq %rsi, (%rax)
-; BURR-NEXT: cmovnsq %r8, %rdx
-; BURR-NEXT: cmoveq %r8, %rdx
-; BURR-NEXT: movq %rdx, 24(%rax)
-; BURR-NEXT: cmovnsq %r9, %rdi
+; BURR-NEXT: movq %r11, (%rax)
+; BURR-NEXT: cmovnsq %r8, %rdi
; BURR-NEXT: cmoveq %r8, %rdi
-; BURR-NEXT: movq %rdi, 16(%rax)
+; BURR-NEXT: movq %rdi, 24(%rax)
+; BURR-NEXT: cmovnsq %r9, %rdx
+; BURR-NEXT: cmoveq %r8, %rdx
+; BURR-NEXT: movq %rdx, 16(%rax)
; BURR-NEXT: retq
;
; SRC-LABEL: test1:
; SRC: # %bb.0:
; SRC-NEXT: pushq %rbx
; SRC-NEXT: movq %rdi, %rax
-; SRC-NEXT: leal 3(%rsi,%rsi), %r9d
+; SRC-NEXT: incl %esi
+; SRC-NEXT: addb %sil, %sil
+; SRC-NEXT: orb $1, %sil
; SRC-NEXT: movb $-128, %cl
-; SRC-NEXT: subb %r9b, %cl
+; SRC-NEXT: subb %sil, %cl
; SRC-NEXT: xorl %r8d, %r8d
; SRC-NEXT: movl $1, %edi
; SRC-NEXT: movl $1, %r10d
; SRC-NEXT: shrdq %cl, %r8, %r10
; SRC-NEXT: testb $64, %cl
; SRC-NEXT: cmovneq %r8, %r10
-; SRC-NEXT: movl %r9d, %r11d
-; SRC-NEXT: addb $-128, %r11b
-; SRC-NEXT: xorl %esi, %esi
-; SRC-NEXT: movl %r11d, %ecx
-; SRC-NEXT: shldq %cl, %rdi, %rsi
+; SRC-NEXT: movl %esi, %r9d
+; SRC-NEXT: addb $-128, %r9b
; SRC-NEXT: xorl %edx, %edx
; SRC-NEXT: movl %r9d, %ecx
; SRC-NEXT: shldq %cl, %rdi, %rdx
+; SRC-NEXT: xorl %r11d, %r11d
+; SRC-NEXT: movl %esi, %ecx
+; SRC-NEXT: shldq %cl, %rdi, %r11
; SRC-NEXT: movl $1, %ebx
; SRC-NEXT: shlq %cl, %rbx
-; SRC-NEXT: testb $64, %r9b
-; SRC-NEXT: cmovneq %rbx, %rdx
+; SRC-NEXT: testb $64, %sil
+; SRC-NEXT: cmovneq %rbx, %r11
; SRC-NEXT: cmovneq %r8, %rbx
-; SRC-NEXT: movl %r11d, %ecx
+; SRC-NEXT: movl %r9d, %ecx
; SRC-NEXT: shlq %cl, %rdi
-; SRC-NEXT: testb $64, %r11b
-; SRC-NEXT: cmovneq %rdi, %rsi
+; SRC-NEXT: testb $64, %r9b
+; SRC-NEXT: cmovneq %rdi, %rdx
; SRC-NEXT: cmovneq %r8, %rdi
-; SRC-NEXT: testb %r9b, %r9b
+; SRC-NEXT: testb %sil, %sil
; SRC-NEXT: cmovnsq %r10, %rdi
; SRC-NEXT: cmoveq %r8, %rdi
-; SRC-NEXT: cmovnsq %r8, %rsi
-; SRC-NEXT: cmoveq %r8, %rsi
-; SRC-NEXT: cmovsq %r8, %rdx
+; SRC-NEXT: cmovnsq %r8, %rdx
+; SRC-NEXT: cmoveq %r8, %rdx
+; SRC-NEXT: cmovsq %r8, %r11
; SRC-NEXT: cmovsq %r8, %rbx
-; SRC-NEXT: movq %rdx, 8(%rax)
+; SRC-NEXT: movq %r11, 8(%rax)
; SRC-NEXT: movq %rbx, (%rax)
-; SRC-NEXT: movq %rsi, 24(%rax)
+; SRC-NEXT: movq %rdx, 24(%rax)
; SRC-NEXT: movq %rdi, 16(%rax)
; SRC-NEXT: popq %rbx
; SRC-NEXT: retq
; LIN-NEXT: movq %rdi, %rax
; LIN-NEXT: xorl %r9d, %r9d
; LIN-NEXT: movl $1, %r8d
-; LIN-NEXT: leal 3(%rsi,%rsi), %edx
-; LIN-NEXT: movl $1, %esi
-; LIN-NEXT: movl %edx, %ecx
-; LIN-NEXT: shlq %cl, %rsi
-; LIN-NEXT: testb $64, %dl
-; LIN-NEXT: movq %rsi, %rcx
+; LIN-NEXT: incl %esi
+; LIN-NEXT: addb %sil, %sil
+; LIN-NEXT: orb $1, %sil
+; LIN-NEXT: movl $1, %edx
+; LIN-NEXT: movl %esi, %ecx
+; LIN-NEXT: shlq %cl, %rdx
+; LIN-NEXT: testb $64, %sil
+; LIN-NEXT: movq %rdx, %rcx
; LIN-NEXT: cmovneq %r9, %rcx
-; LIN-NEXT: testb %dl, %dl
+; LIN-NEXT: testb %sil, %sil
; LIN-NEXT: cmovsq %r9, %rcx
; LIN-NEXT: movq %rcx, (%rdi)
; LIN-NEXT: xorl %edi, %edi
-; LIN-NEXT: movl %edx, %ecx
+; LIN-NEXT: movl %esi, %ecx
; LIN-NEXT: shldq %cl, %r8, %rdi
-; LIN-NEXT: cmovneq %rsi, %rdi
+; LIN-NEXT: cmovneq %rdx, %rdi
; LIN-NEXT: cmovsq %r9, %rdi
; LIN-NEXT: movq %rdi, 8(%rax)
-; LIN-NEXT: movl %edx, %esi
-; LIN-NEXT: addb $-128, %sil
+; LIN-NEXT: movl %esi, %edx
+; LIN-NEXT: addb $-128, %dl
; LIN-NEXT: movl $1, %r10d
-; LIN-NEXT: movl %esi, %ecx
+; LIN-NEXT: movl %edx, %ecx
; LIN-NEXT: shlq %cl, %r10
-; LIN-NEXT: testb $64, %sil
+; LIN-NEXT: testb $64, %dl
; LIN-NEXT: movq %r10, %rdi
; LIN-NEXT: cmovneq %r9, %rdi
; LIN-NEXT: movb $-128, %cl
-; LIN-NEXT: subb %dl, %cl
-; LIN-NEXT: movl $1, %edx
-; LIN-NEXT: shrdq %cl, %r9, %rdx
+; LIN-NEXT: subb %sil, %cl
+; LIN-NEXT: movl $1, %esi
+; LIN-NEXT: shrdq %cl, %r9, %rsi
; LIN-NEXT: testb $64, %cl
-; LIN-NEXT: cmovneq %r9, %rdx
-; LIN-NEXT: cmovsq %rdi, %rdx
-; LIN-NEXT: cmoveq %r9, %rdx
-; LIN-NEXT: movq %rdx, 16(%rax)
-; LIN-NEXT: xorl %edx, %edx
-; LIN-NEXT: movl %esi, %ecx
-; LIN-NEXT: shldq %cl, %r8, %rdx
-; LIN-NEXT: cmovneq %r10, %rdx
-; LIN-NEXT: cmovnsq %r9, %rdx
-; LIN-NEXT: cmoveq %r9, %rdx
-; LIN-NEXT: movq %rdx, 24(%rax)
+; LIN-NEXT: cmovneq %r9, %rsi
+; LIN-NEXT: cmovsq %rdi, %rsi
+; LIN-NEXT: cmoveq %r9, %rsi
+; LIN-NEXT: movq %rsi, 16(%rax)
+; LIN-NEXT: xorl %esi, %esi
+; LIN-NEXT: movl %edx, %ecx
+; LIN-NEXT: shldq %cl, %r8, %rsi
+; LIN-NEXT: cmovneq %r10, %rsi
+; LIN-NEXT: cmovnsq %r9, %rsi
+; LIN-NEXT: cmoveq %r9, %rsi
+; LIN-NEXT: movq %rsi, 24(%rax)
; LIN-NEXT: retq
%b = add i256 %a, 1
%m = shl i256 %b, 1
; CHECK-WIN32-64-LABEL: and16_trunc_8_sign:
; CHECK-WIN32-64: # %bb.0:
; CHECK-WIN32-64-NEXT: subq $40, %rsp
-; CHECK-WIN32-64-NEXT: # kill: def $cx killed $cx def $ecx
; CHECK-WIN32-64-NEXT: testb $-128, %cl
; CHECK-WIN32-64-NEXT: jg .LBB13_2
; CHECK-WIN32-64-NEXT: # %bb.1: # %yes
;
; CHECK-X86-LABEL: and16_trunc_8_sign:
; CHECK-X86: # %bb.0:
-; CHECK-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT: testb $-128, %al
+; CHECK-X86-NEXT: testb $-128, {{[0-9]+}}(%esp)
; CHECK-X86-NEXT: jg .LBB13_2
; CHECK-X86-NEXT: # %bb.1: # %yes
; CHECK-X86-NEXT: calll bar
;
; CHECK-X86-LABEL: and32_trunc_16_sign:
; CHECK-X86: # %bb.0:
-; CHECK-X86-NEXT: movl $32768, %eax # imm = 0x8000
-; CHECK-X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT: andl $32768, %eax # imm = 0x8000
; CHECK-X86-NEXT: testw %ax, %ax
; CHECK-X86-NEXT: jg .LBB16_2
; CHECK-X86-NEXT: # %bb.1: # %yes
;
; CHECK-X86-LABEL: and32_trunc_16_sign_minsize:
; CHECK-X86: # %bb.0:
-; CHECK-X86-NEXT: testw $-32768, {{[0-9]+}}(%esp) # imm = 0x8000
+; CHECK-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT: testw $-32768, %ax # imm = 0x8000
; CHECK-X86-NEXT: jg .LBB17_2
; CHECK-X86-NEXT: # %bb.1: # %yes
; CHECK-X86-NEXT: calll bar
define <4 x i32> @trunc_sub_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
; SSE-LABEL: trunc_sub_const_v4i64_v4i32:
; SSE: # %bb.0:
-; SSE-NEXT: movl $1, %eax
-; SSE-NEXT: movq %rax, %xmm2
-; SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
-; SSE-NEXT: psubq %xmm2, %xmm0
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm1
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE-NEXT: psubd {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_sub_const_v4i64_v4i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: movl $1, %eax
-; AVX1-NEXT: vmovq %rax, %xmm1
-; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
-; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[0,2]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-SLOW-LABEL: trunc_sub_const_v4i64_v4i32:
; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-SLOW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX2-SLOW-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
; AVX2-SLOW-NEXT: vzeroupper
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc_sub_const_v4i64_v4i32:
; AVX2-FAST: # %bb.0:
-; AVX2-FAST-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,2,4,6,4,6,6,7]
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
-; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX2-FAST-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
; AVX2-FAST-NEXT: vzeroupper
; AVX2-FAST-NEXT: retq
;
; AVX512-LABEL: trunc_sub_const_v4i64_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = sub <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3>
define <8 x i16> @trunc_sub_const_v8i64_v8i16(<8 x i64> %a0) nounwind {
; SSE-LABEL: trunc_sub_const_v8i64_v8i16:
; SSE: # %bb.0:
-; SSE-NEXT: movl $1, %eax
-; SSE-NEXT: movq %rax, %xmm4
-; SSE-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
-; SSE-NEXT: psubq %xmm4, %xmm0
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm1
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm2
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm3
-; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
-; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
-; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE-NEXT: pshuflw {{.*#+}} xmm4 = xmm0[0,2,2,3,4,5,6,7]
+; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
+; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,1,0,2,4,5,6,7]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
-; SSE-NEXT: movapd %xmm2, %xmm0
+; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
+; SSE-NEXT: psubw {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_sub_const_v8i64_v8i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: movl $1, %eax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
-; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm3
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
-; AVX1-NEXT: vpackusdw %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
-; AVX1-NEXT: vpackusdw %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
+; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
+; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-SLOW-LABEL: trunc_sub_const_v8i64_v8i16:
; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vpsubq {{.*}}(%rip), %ymm1, %ymm1
-; AVX2-SLOW-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-SLOW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX2-SLOW-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
; AVX2-SLOW-NEXT: vzeroupper
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc_sub_const_v8i64_v8i16:
; AVX2-FAST: # %bb.0:
-; AVX2-FAST-NEXT: vpsubq {{.*}}(%rip), %ymm1, %ymm1
-; AVX2-FAST-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX2-FAST-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
; AVX2-FAST-NEXT: vzeroupper
; AVX2-FAST-NEXT: retq
;
; AVX512-LABEL: trunc_sub_const_v8i64_v8i16:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubq {{.*}}(%rip), %zmm0, %zmm0
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = sub <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
define <8 x i16> @trunc_sub_const_v8i32_v8i16(<8 x i32> %a0) nounwind {
; SSE-LABEL: trunc_sub_const_v8i32_v8i16:
; SSE: # %bb.0:
-; SSE-NEXT: psubd {{.*}}(%rip), %xmm0
-; SSE-NEXT: psubd {{.*}}(%rip), %xmm1
; SSE-NEXT: pslld $16, %xmm1
; SSE-NEXT: psrad $16, %xmm1
; SSE-NEXT: pslld $16, %xmm0
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: packssdw %xmm1, %xmm0
+; SSE-NEXT: psubw {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_sub_const_v8i32_v8i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: trunc_sub_const_v8i32_v8i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsubd {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX2-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: trunc_sub_const_v8i32_v8i16:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubd {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = sub <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
define <16 x i8> @trunc_sub_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
; SSE-LABEL: trunc_sub_const_v16i64_v16i8:
; SSE: # %bb.0:
-; SSE-NEXT: movl $1, %eax
-; SSE-NEXT: movq %rax, %xmm8
-; SSE-NEXT: pslldq {{.*#+}} xmm8 = zero,zero,zero,zero,zero,zero,zero,zero,xmm8[0,1,2,3,4,5,6,7]
-; SSE-NEXT: psubq %xmm8, %xmm0
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm1
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm2
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm3
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm4
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm5
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm6
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm7
; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; SSE-NEXT: pand %xmm8, %xmm7
; SSE-NEXT: pand %xmm8, %xmm6
; SSE-NEXT: packuswb %xmm1, %xmm0
; SSE-NEXT: packuswb %xmm2, %xmm0
; SSE-NEXT: packuswb %xmm4, %xmm0
+; SSE-NEXT: psubb {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_sub_const_v16i64_v16i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: movl $1, %eax
-; AVX1-NEXT: vmovq %rax, %xmm4
-; AVX1-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
-; AVX1-NEXT: vpsubq %xmm4, %xmm0, %xmm8
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm5
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm2, %xmm6
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm2, %xmm2
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm3, %xmm7
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm3, %xmm3
-; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [1.2598673968951787E-321,1.2598673968951787E-321]
-; AVX1-NEXT: # xmm4 = mem[0,0]
-; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpand %xmm4, %xmm7, %xmm7
-; AVX1-NEXT: vpackusdw %xmm3, %xmm7, %xmm3
-; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpand %xmm4, %xmm6, %xmm6
-; AVX1-NEXT: vpackusdw %xmm2, %xmm6, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [1.2598673968951787E-321,1.2598673968951787E-321]
+; AVX1-NEXT: # xmm5 = mem[0,0]
+; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpackusdw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vandpd %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpackusdw %xmm4, %xmm2, %xmm2
; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm3
-; AVX1-NEXT: vpackusdw %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm4, %xmm8, %xmm3
-; AVX1-NEXT: vpackusdw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vandpd %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vandpd %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-SLOW-LABEL: trunc_sub_const_v16i64_v16i8:
; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vpsubq {{.*}}(%rip), %ymm1, %ymm1
-; AVX2-SLOW-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-SLOW-NEXT: vpsubq {{.*}}(%rip), %ymm3, %ymm3
-; AVX2-SLOW-NEXT: vpsubq {{.*}}(%rip), %ymm2, %ymm2
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7]
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7]
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-SLOW-NEXT: vpand %xmm4, %xmm0, %xmm0
; AVX2-SLOW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX2-SLOW-NEXT: vzeroupper
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc_sub_const_v16i64_v16i8:
; AVX2-FAST: # %bb.0:
-; AVX2-FAST-NEXT: vpsubq {{.*}}(%rip), %ymm1, %ymm1
-; AVX2-FAST-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-FAST-NEXT: vpsubq {{.*}}(%rip), %ymm3, %ymm3
-; AVX2-FAST-NEXT: vpsubq {{.*}}(%rip), %ymm2, %ymm2
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7]
; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm2
; AVX2-FAST-NEXT: vpermd %ymm3, %ymm4, %ymm3
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-FAST-NEXT: vpand %xmm5, %xmm0, %xmm0
; AVX2-FAST-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX2-FAST-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX2-FAST-NEXT: vzeroupper
; AVX2-FAST-NEXT: retq
;
; AVX512-LABEL: trunc_sub_const_v16i64_v16i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubq {{.*}}(%rip), %zmm1, %zmm1
-; AVX512-NEXT: vpsubq {{.*}}(%rip), %zmm0, %zmm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm1, %ymm1
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = sub <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
define <16 x i8> @trunc_sub_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
; SSE-LABEL: trunc_sub_const_v16i32_v16i8:
; SSE: # %bb.0:
-; SSE-NEXT: psubd {{.*}}(%rip), %xmm0
-; SSE-NEXT: psubd {{.*}}(%rip), %xmm1
-; SSE-NEXT: psubd {{.*}}(%rip), %xmm2
-; SSE-NEXT: psubd {{.*}}(%rip), %xmm3
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; SSE-NEXT: pand %xmm4, %xmm3
; SSE-NEXT: pand %xmm4, %xmm2
; SSE-NEXT: pand %xmm4, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
; SSE-NEXT: packuswb %xmm2, %xmm0
+; SSE-NEXT: psubb {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_sub_const_v16i32_v16i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm1, %xmm3
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43]
-; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpackusdw %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackusdw %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43]
+; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: trunc_sub_const_v16i32_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsubd {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vpsubd {{.*}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: trunc_sub_const_v16i32_v16i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubd {{.*}}(%rip), %zmm0, %zmm0
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = sub <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
define <16 x i8> @trunc_sub_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
; SSE-LABEL: trunc_sub_const_v16i16_v16i8:
; SSE: # %bb.0:
-; SSE-NEXT: psubw {{.*}}(%rip), %xmm0
-; SSE-NEXT: psubw {{.*}}(%rip), %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
+; SSE-NEXT: psubb {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_sub_const_v16i16_v16i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpackuswb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: trunc_sub_const_v16i16_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: trunc_sub_const_v16i16_v16i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: trunc_sub_const_v16i16_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512BW-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: trunc_sub_const_v16i16_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512DQ-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
%1 = sub <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
define <4 x i32> @trunc_sub_const_v4i64_v4i32(<4 x i64> %a0) nounwind {
; SSE-LABEL: trunc_sub_const_v4i64_v4i32:
; SSE: # %bb.0:
-; SSE-NEXT: movl $1, %eax
-; SSE-NEXT: movq %rax, %xmm2
-; SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
-; SSE-NEXT: psubq %xmm2, %xmm0
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm1
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE-NEXT: psubd {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_sub_const_v4i64_v4i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: movl $1, %eax
-; AVX1-NEXT: vmovq %rax, %xmm1
-; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
-; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[0,2]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-SLOW-LABEL: trunc_sub_const_v4i64_v4i32:
; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-SLOW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX2-SLOW-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
; AVX2-SLOW-NEXT: vzeroupper
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc_sub_const_v4i64_v4i32:
; AVX2-FAST: # %bb.0:
-; AVX2-FAST-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,2,4,6,4,6,6,7]
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
-; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX2-FAST-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
; AVX2-FAST-NEXT: vzeroupper
; AVX2-FAST-NEXT: retq
;
; AVX512-LABEL: trunc_sub_const_v4i64_v4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = sub <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3>
define <8 x i16> @trunc_sub_const_v8i64_v8i16(<8 x i64> %a0) nounwind {
; SSE-LABEL: trunc_sub_const_v8i64_v8i16:
; SSE: # %bb.0:
-; SSE-NEXT: movl $1, %eax
-; SSE-NEXT: movq %rax, %xmm4
-; SSE-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
-; SSE-NEXT: psubq %xmm4, %xmm0
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm1
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm2
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm3
-; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
-; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
-; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE-NEXT: pshuflw {{.*#+}} xmm4 = xmm0[0,2,2,3,4,5,6,7]
+; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
+; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,1,0,2,4,5,6,7]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
-; SSE-NEXT: movapd %xmm2, %xmm0
+; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
+; SSE-NEXT: psubw {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_sub_const_v8i64_v8i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: movl $1, %eax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7]
-; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm3
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
-; AVX1-NEXT: vpackusdw %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
-; AVX1-NEXT: vpackusdw %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
+; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
+; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-SLOW-LABEL: trunc_sub_const_v8i64_v8i16:
; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vpsubq {{.*}}(%rip), %ymm1, %ymm1
-; AVX2-SLOW-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-SLOW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX2-SLOW-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
; AVX2-SLOW-NEXT: vzeroupper
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc_sub_const_v8i64_v8i16:
; AVX2-FAST: # %bb.0:
-; AVX2-FAST-NEXT: vpsubq {{.*}}(%rip), %ymm1, %ymm1
-; AVX2-FAST-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX2-FAST-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
; AVX2-FAST-NEXT: vzeroupper
; AVX2-FAST-NEXT: retq
;
; AVX512-LABEL: trunc_sub_const_v8i64_v8i16:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubq {{.*}}(%rip), %zmm0, %zmm0
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = sub <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
define <8 x i16> @trunc_sub_const_v8i32_v8i16(<8 x i32> %a0) nounwind {
; SSE-LABEL: trunc_sub_const_v8i32_v8i16:
; SSE: # %bb.0:
-; SSE-NEXT: psubd {{.*}}(%rip), %xmm0
-; SSE-NEXT: psubd {{.*}}(%rip), %xmm1
; SSE-NEXT: pslld $16, %xmm1
; SSE-NEXT: psrad $16, %xmm1
; SSE-NEXT: pslld $16, %xmm0
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: packssdw %xmm1, %xmm0
+; SSE-NEXT: psubw {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_sub_const_v8i32_v8i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: trunc_sub_const_v8i32_v8i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsubd {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
-; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX2-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: trunc_sub_const_v8i32_v8i16:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubd {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = sub <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
define <16 x i8> @trunc_sub_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
; SSE-LABEL: trunc_sub_const_v16i64_v16i8:
; SSE: # %bb.0:
-; SSE-NEXT: movl $1, %eax
-; SSE-NEXT: movq %rax, %xmm8
-; SSE-NEXT: pslldq {{.*#+}} xmm8 = zero,zero,zero,zero,zero,zero,zero,zero,xmm8[0,1,2,3,4,5,6,7]
-; SSE-NEXT: psubq %xmm8, %xmm0
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm1
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm2
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm3
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm4
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm5
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm6
-; SSE-NEXT: psubq {{.*}}(%rip), %xmm7
; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; SSE-NEXT: pand %xmm8, %xmm7
; SSE-NEXT: pand %xmm8, %xmm6
; SSE-NEXT: packuswb %xmm1, %xmm0
; SSE-NEXT: packuswb %xmm2, %xmm0
; SSE-NEXT: packuswb %xmm4, %xmm0
+; SSE-NEXT: psubb {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_sub_const_v16i64_v16i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: movl $1, %eax
-; AVX1-NEXT: vmovq %rax, %xmm4
-; AVX1-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7]
-; AVX1-NEXT: vpsubq %xmm4, %xmm0, %xmm8
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm5
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm2, %xmm6
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm2, %xmm2
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm3, %xmm7
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
-; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm3, %xmm3
-; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [1.2598673968951787E-321,1.2598673968951787E-321]
-; AVX1-NEXT: # xmm4 = mem[0,0]
-; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpand %xmm4, %xmm7, %xmm7
-; AVX1-NEXT: vpackusdw %xmm3, %xmm7, %xmm3
-; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpand %xmm4, %xmm6, %xmm6
-; AVX1-NEXT: vpackusdw %xmm2, %xmm6, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [1.2598673968951787E-321,1.2598673968951787E-321]
+; AVX1-NEXT: # xmm5 = mem[0,0]
+; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpackusdw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vandpd %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vandpd %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpackusdw %xmm4, %xmm2, %xmm2
; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm3
-; AVX1-NEXT: vpackusdw %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm4, %xmm8, %xmm3
-; AVX1-NEXT: vpackusdw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vandpd %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vandpd %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vandpd %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-SLOW-LABEL: trunc_sub_const_v16i64_v16i8:
; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vpsubq {{.*}}(%rip), %ymm1, %ymm1
-; AVX2-SLOW-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-SLOW-NEXT: vpsubq {{.*}}(%rip), %ymm3, %ymm3
-; AVX2-SLOW-NEXT: vpsubq {{.*}}(%rip), %ymm2, %ymm2
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7]
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7]
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-SLOW-NEXT: vpand %xmm4, %xmm0, %xmm0
; AVX2-SLOW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX2-SLOW-NEXT: vzeroupper
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: trunc_sub_const_v16i64_v16i8:
; AVX2-FAST: # %bb.0:
-; AVX2-FAST-NEXT: vpsubq {{.*}}(%rip), %ymm1, %ymm1
-; AVX2-FAST-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-FAST-NEXT: vpsubq {{.*}}(%rip), %ymm3, %ymm3
-; AVX2-FAST-NEXT: vpsubq {{.*}}(%rip), %ymm2, %ymm2
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,4,6,4,6,6,7]
; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm2
; AVX2-FAST-NEXT: vpermd %ymm3, %ymm4, %ymm3
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-FAST-NEXT: vpand %xmm5, %xmm0, %xmm0
; AVX2-FAST-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX2-FAST-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX2-FAST-NEXT: vzeroupper
; AVX2-FAST-NEXT: retq
;
; AVX512-LABEL: trunc_sub_const_v16i64_v16i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubq {{.*}}(%rip), %zmm1, %zmm1
-; AVX512-NEXT: vpsubq {{.*}}(%rip), %zmm0, %zmm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm1, %ymm1
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = sub <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
define <16 x i8> @trunc_sub_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
; SSE-LABEL: trunc_sub_const_v16i32_v16i8:
; SSE: # %bb.0:
-; SSE-NEXT: psubd {{.*}}(%rip), %xmm0
-; SSE-NEXT: psubd {{.*}}(%rip), %xmm1
-; SSE-NEXT: psubd {{.*}}(%rip), %xmm2
-; SSE-NEXT: psubd {{.*}}(%rip), %xmm3
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; SSE-NEXT: pand %xmm4, %xmm3
; SSE-NEXT: pand %xmm4, %xmm2
; SSE-NEXT: pand %xmm4, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
; SSE-NEXT: packuswb %xmm2, %xmm0
+; SSE-NEXT: psubb {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_sub_const_v16i32_v16i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm1, %xmm3
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43]
-; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpackusdw %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpackusdw %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43]
+; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: trunc_sub_const_v16i32_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsubd {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vpsubd {{.*}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: trunc_sub_const_v16i32_v16i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsubd {{.*}}(%rip), %zmm0, %zmm0
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = sub <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
define <16 x i8> @trunc_sub_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
; SSE-LABEL: trunc_sub_const_v16i16_v16i8:
; SSE: # %bb.0:
-; SSE-NEXT: psubw {{.*}}(%rip), %xmm0
-; SSE-NEXT: psubw {{.*}}(%rip), %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
+; SSE-NEXT: psubb {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_sub_const_v16i16_v16i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpackuswb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: trunc_sub_const_v16i16_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: trunc_sub_const_v16i16_v16i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: trunc_sub_const_v16i16_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512BW-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: trunc_sub_const_v16i16_v16i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512DQ-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
%1 = sub <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
; CHECK-NEXT: je .LBB0_3
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: movl %edi, %edx
-; CHECK-NEXT: andl $7, %edx
+; CHECK-NEXT: andb $7, %dl
; CHECK-NEXT: cmpb %cl, %dl
; CHECK-NEXT: jge .LBB0_2
; CHECK-NEXT: .LBB0_3: