Known = Known2.abs();
break;
}
+ case ISD::USUBSAT: {
+ // The result of usubsat will never be larger than the LHS.
+ Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known.Zero.setHighBits(Known2.countMinLeadingZeros());
+ break;
+ }
case ISD::UMIN: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s4, 0xffff
-; GFX6-NEXT: v_and_b32_e32 v4, s4, v2
-; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
-; GFX6-NEXT: v_and_b32_e32 v5, s4, v3
+; GFX6-NEXT: v_and_b32_e32 v4, s4, v3
; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
-; GFX6-NEXT: v_max_u32_e32 v1, v1, v5
-; GFX6-NEXT: v_max_u32_e32 v0, v0, v4
+; GFX6-NEXT: v_max_u32_e32 v1, v1, v4
+; GFX6-NEXT: v_and_b32_e32 v2, s4, v2
+; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
+; GFX6-NEXT: v_max_u32_e32 v0, v0, v2
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX6-NEXT: s_setpc_b64 s[30:31]
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s4, 0xffff
-; GFX6-NEXT: v_and_b32_e32 v6, s4, v3
-; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
-; GFX6-NEXT: v_and_b32_e32 v7, s4, v4
+; GFX6-NEXT: v_and_b32_e32 v6, s4, v4
; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
-; GFX6-NEXT: v_max_u32_e32 v1, v1, v7
-; GFX6-NEXT: v_max_u32_e32 v0, v0, v6
+; GFX6-NEXT: v_max_u32_e32 v1, v1, v6
+; GFX6-NEXT: v_and_b32_e32 v3, s4, v3
+; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
+; GFX6-NEXT: v_max_u32_e32 v0, v0, v3
+; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
; GFX6-NEXT: v_and_b32_e32 v5, s4, v5
; GFX6-NEXT: v_and_b32_e32 v2, s4, v2
-; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
-; GFX6-NEXT: v_max_u32_e32 v2, v2, v5
-; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v2, v5
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX6-NEXT: v_and_b32_e32 v2, s4, v3
-; GFX6-NEXT: v_alignbit_b32 v1, v3, v1, 16
+; GFX6-NEXT: v_max_u32_e32 v1, v2, v5
+; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v1, v5
+; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_usubsat_v3i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: s_mov_b32 s4, 0xffff
-; GFX6-NEXT: v_and_b32_e32 v10, s4, v4
-; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
-; GFX6-NEXT: v_and_b32_e32 v11, s4, v5
+; GFX6-NEXT: v_and_b32_e32 v9, s4, v5
; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
-; GFX6-NEXT: v_max_u32_e32 v1, v1, v11
-; GFX6-NEXT: v_max_u32_e32 v0, v0, v10
+; GFX6-NEXT: v_max_u32_e32 v1, v1, v9
+; GFX6-NEXT: v_and_b32_e32 v4, s4, v4
+; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
+; GFX6-NEXT: v_max_u32_e32 v0, v0, v4
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
-; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
-; GFX6-NEXT: v_and_b32_e32 v8, s4, v6
-; GFX6-NEXT: v_and_b32_e32 v2, s4, v2
-; GFX6-NEXT: v_and_b32_e32 v9, s4, v7
+; GFX6-NEXT: v_and_b32_e32 v8, s4, v7
; GFX6-NEXT: v_and_b32_e32 v3, s4, v3
+; GFX6-NEXT: v_and_b32_e32 v6, s4, v6
+; GFX6-NEXT: v_and_b32_e32 v2, s4, v2
+; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
-; GFX6-NEXT: v_max_u32_e32 v2, v2, v8
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX6-NEXT: v_max_u32_e32 v1, v3, v9
-; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
-; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v6
-; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
+; GFX6-NEXT: v_max_u32_e32 v1, v2, v6
+; GFX6-NEXT: v_max_u32_e32 v2, v3, v8
+; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v7
+; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
+; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_usubsat_v4i16:
; CHECK-T1-NEXT: @ %bb.1:
; CHECK-T1-NEXT: movs r0, #0
; CHECK-T1-NEXT: .LBB2_2:
-; CHECK-T1-NEXT: uxth r0, r0
; CHECK-T1-NEXT: bx lr
;
; CHECK-T2-LABEL: func16:
; CHECK-T2-NEXT: subs r0, r0, r1
; CHECK-T2-NEXT: it lo
; CHECK-T2-NEXT: movlo r0, #0
-; CHECK-T2-NEXT: uxth r0, r0
; CHECK-T2-NEXT: bx lr
;
; CHECK-ARM-LABEL: func16:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: subs r0, r0, r1
; CHECK-ARM-NEXT: movlo r0, #0
-; CHECK-ARM-NEXT: uxth r0, r0
; CHECK-ARM-NEXT: bx lr
%tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %y)
ret i16 %tmp
; CHECK-T1-NEXT: @ %bb.1:
; CHECK-T1-NEXT: movs r0, #0
; CHECK-T1-NEXT: .LBB3_2:
-; CHECK-T1-NEXT: uxtb r0, r0
; CHECK-T1-NEXT: bx lr
;
; CHECK-T2-LABEL: func8:
; CHECK-T2-NEXT: subs r0, r0, r1
; CHECK-T2-NEXT: it lo
; CHECK-T2-NEXT: movlo r0, #0
-; CHECK-T2-NEXT: uxtb r0, r0
; CHECK-T2-NEXT: bx lr
;
; CHECK-ARM-LABEL: func8:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: subs r0, r0, r1
; CHECK-ARM-NEXT: movlo r0, #0
-; CHECK-ARM-NEXT: uxtb r0, r0
; CHECK-ARM-NEXT: bx lr
%tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %y)
ret i8 %tmp
define zeroext i4 @func3(i4 zeroext %x, i4 zeroext %y) nounwind {
; CHECK-T1-LABEL: func3:
; CHECK-T1: @ %bb.0:
-; CHECK-T1-NEXT: subs r1, r0, r1
+; CHECK-T1-NEXT: subs r0, r0, r1
; CHECK-T1-NEXT: bhs .LBB4_2
; CHECK-T1-NEXT: @ %bb.1:
-; CHECK-T1-NEXT: movs r1, #0
+; CHECK-T1-NEXT: movs r0, #0
; CHECK-T1-NEXT: .LBB4_2:
-; CHECK-T1-NEXT: movs r0, #15
-; CHECK-T1-NEXT: ands r0, r1
; CHECK-T1-NEXT: bx lr
;
; CHECK-T2-LABEL: func3:
; CHECK-T2-NEXT: subs r0, r0, r1
; CHECK-T2-NEXT: it lo
; CHECK-T2-NEXT: movlo r0, #0
-; CHECK-T2-NEXT: and r0, r0, #15
; CHECK-T2-NEXT: bx lr
;
; CHECK-ARM-LABEL: func3:
; CHECK-ARM: @ %bb.0:
; CHECK-ARM-NEXT: subs r0, r0, r1
; CHECK-ARM-NEXT: movlo r0, #0
-; CHECK-ARM-NEXT: and r0, r0, #15
; CHECK-ARM-NEXT: bx lr
%tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %y)
ret i4 %tmp
; CHECK-T1-NEXT: @ %bb.1:
; CHECK-T1-NEXT: movs r0, #0
; CHECK-T1-NEXT: .LBB2_2:
-; CHECK-T1-NEXT: uxth r0, r0
; CHECK-T1-NEXT: bx lr
;
; CHECK-T2-LABEL: func16:
; CHECK-T2-NEXT: subs r0, r0, r1
; CHECK-T2-NEXT: it lo
; CHECK-T2-NEXT: movlo r0, #0
-; CHECK-T2-NEXT: uxth r0, r0
; CHECK-T2-NEXT: bx lr
;
; CHECK-ARM-LABEL: func16:
; CHECK-ARM-NEXT: uxth r1, r1
; CHECK-ARM-NEXT: subs r0, r0, r1
; CHECK-ARM-NEXT: movlo r0, #0
-; CHECK-ARM-NEXT: uxth r0, r0
; CHECK-ARM-NEXT: bx lr
%a = mul i16 %y, %z
%tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %a)
; CHECK-T1-NEXT: @ %bb.1:
; CHECK-T1-NEXT: movs r0, #0
; CHECK-T1-NEXT: .LBB3_2:
-; CHECK-T1-NEXT: uxtb r0, r0
; CHECK-T1-NEXT: bx lr
;
; CHECK-T2-LABEL: func8:
; CHECK-T2-NEXT: subs r0, r0, r1
; CHECK-T2-NEXT: it lo
; CHECK-T2-NEXT: movlo r0, #0
-; CHECK-T2-NEXT: uxtb r0, r0
; CHECK-T2-NEXT: bx lr
;
; CHECK-ARM-LABEL: func8:
; CHECK-ARM-NEXT: uxtb r1, r1
; CHECK-ARM-NEXT: subs r0, r0, r1
; CHECK-ARM-NEXT: movlo r0, #0
-; CHECK-ARM-NEXT: uxtb r0, r0
; CHECK-ARM-NEXT: bx lr
%a = mul i8 %y, %z
%tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %a)
; CHECK-T1: @ %bb.0:
; CHECK-T1-NEXT: muls r1, r2, r1
; CHECK-T1-NEXT: movs r2, #15
-; CHECK-T1-NEXT: ands r1, r2
-; CHECK-T1-NEXT: subs r0, r0, r1
+; CHECK-T1-NEXT: ands r2, r1
+; CHECK-T1-NEXT: subs r0, r0, r2
; CHECK-T1-NEXT: bhs .LBB4_2
; CHECK-T1-NEXT: @ %bb.1:
; CHECK-T1-NEXT: movs r0, #0
; CHECK-T1-NEXT: .LBB4_2:
-; CHECK-T1-NEXT: ands r0, r2
; CHECK-T1-NEXT: bx lr
;
; CHECK-T2-LABEL: func4:
; CHECK-T2-NEXT: subs r0, r0, r1
; CHECK-T2-NEXT: it lo
; CHECK-T2-NEXT: movlo r0, #0
-; CHECK-T2-NEXT: and r0, r0, #15
; CHECK-T2-NEXT: bx lr
;
; CHECK-ARM-LABEL: func4:
; CHECK-ARM-NEXT: and r1, r1, #15
; CHECK-ARM-NEXT: subs r0, r0, r1
; CHECK-ARM-NEXT: movlo r0, #0
-; CHECK-ARM-NEXT: and r0, r0, #15
; CHECK-ARM-NEXT: bx lr
%a = mul i4 %y, %z
%tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %a)
define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y) nounwind {
; RV32I-LABEL: func16:
; RV32I: # %bb.0:
-; RV32I-NEXT: sub a2, a0, a1
-; RV32I-NEXT: mv a1, zero
-; RV32I-NEXT: bltu a0, a2, .LBB2_2
+; RV32I-NEXT: mv a2, a0
+; RV32I-NEXT: sub a1, a0, a1
+; RV32I-NEXT: mv a0, zero
+; RV32I-NEXT: bltu a2, a1, .LBB2_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: mv a1, a2
+; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: .LBB2_2:
-; RV32I-NEXT: lui a0, 16
-; RV32I-NEXT: addi a0, a0, -1
-; RV32I-NEXT: and a0, a1, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: func16:
; RV64I: # %bb.0:
-; RV64I-NEXT: sub a2, a0, a1
-; RV64I-NEXT: mv a1, zero
-; RV64I-NEXT: bltu a0, a2, .LBB2_2
+; RV64I-NEXT: mv a2, a0
+; RV64I-NEXT: sub a1, a0, a1
+; RV64I-NEXT: mv a0, zero
+; RV64I-NEXT: bltu a2, a1, .LBB2_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: mv a1, a2
+; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: .LBB2_2:
-; RV64I-NEXT: lui a0, 16
-; RV64I-NEXT: addiw a0, a0, -1
-; RV64I-NEXT: and a0, a1, a0
; RV64I-NEXT: ret
;
; RV32IZbb-LABEL: func16:
; RV32IZbb: # %bb.0:
; RV32IZbb-NEXT: maxu a0, a0, a1
; RV32IZbb-NEXT: sub a0, a0, a1
-; RV32IZbb-NEXT: zext.h a0, a0
; RV32IZbb-NEXT: ret
;
; RV64IZbb-LABEL: func16:
; RV64IZbb: # %bb.0:
; RV64IZbb-NEXT: maxu a0, a0, a1
; RV64IZbb-NEXT: sub a0, a0, a1
-; RV64IZbb-NEXT: zext.h a0, a0
; RV64IZbb-NEXT: ret
%tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %y);
ret i16 %tmp;
define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind {
; RV32I-LABEL: func8:
; RV32I: # %bb.0:
+; RV32I-NEXT: mv a2, a0
; RV32I-NEXT: sub a1, a0, a1
-; RV32I-NEXT: mv a2, zero
-; RV32I-NEXT: bltu a0, a1, .LBB3_2
+; RV32I-NEXT: mv a0, zero
+; RV32I-NEXT: bltu a2, a1, .LBB3_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: mv a2, a1
+; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: .LBB3_2:
-; RV32I-NEXT: andi a0, a2, 255
; RV32I-NEXT: ret
;
; RV64I-LABEL: func8:
; RV64I: # %bb.0:
+; RV64I-NEXT: mv a2, a0
; RV64I-NEXT: sub a1, a0, a1
-; RV64I-NEXT: mv a2, zero
-; RV64I-NEXT: bltu a0, a1, .LBB3_2
+; RV64I-NEXT: mv a0, zero
+; RV64I-NEXT: bltu a2, a1, .LBB3_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: mv a2, a1
+; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: .LBB3_2:
-; RV64I-NEXT: andi a0, a2, 255
; RV64I-NEXT: ret
;
; RV32IZbb-LABEL: func8:
; RV32IZbb: # %bb.0:
; RV32IZbb-NEXT: maxu a0, a0, a1
; RV32IZbb-NEXT: sub a0, a0, a1
-; RV32IZbb-NEXT: andi a0, a0, 255
; RV32IZbb-NEXT: ret
;
; RV64IZbb-LABEL: func8:
; RV64IZbb: # %bb.0:
; RV64IZbb-NEXT: maxu a0, a0, a1
; RV64IZbb-NEXT: sub a0, a0, a1
-; RV64IZbb-NEXT: andi a0, a0, 255
; RV64IZbb-NEXT: ret
%tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %y);
ret i8 %tmp;
define zeroext i4 @func3(i4 zeroext %x, i4 zeroext %y) nounwind {
; RV32I-LABEL: func3:
; RV32I: # %bb.0:
+; RV32I-NEXT: mv a2, a0
; RV32I-NEXT: sub a1, a0, a1
-; RV32I-NEXT: mv a2, zero
-; RV32I-NEXT: bltu a0, a1, .LBB4_2
+; RV32I-NEXT: mv a0, zero
+; RV32I-NEXT: bltu a2, a1, .LBB4_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: mv a2, a1
+; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: .LBB4_2:
-; RV32I-NEXT: andi a0, a2, 15
; RV32I-NEXT: ret
;
; RV64I-LABEL: func3:
; RV64I: # %bb.0:
+; RV64I-NEXT: mv a2, a0
; RV64I-NEXT: sub a1, a0, a1
-; RV64I-NEXT: mv a2, zero
-; RV64I-NEXT: bltu a0, a1, .LBB4_2
+; RV64I-NEXT: mv a0, zero
+; RV64I-NEXT: bltu a2, a1, .LBB4_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: mv a2, a1
+; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: .LBB4_2:
-; RV64I-NEXT: andi a0, a2, 15
; RV64I-NEXT: ret
;
; RV32IZbb-LABEL: func3:
; RV32IZbb: # %bb.0:
; RV32IZbb-NEXT: maxu a0, a0, a1
; RV32IZbb-NEXT: sub a0, a0, a1
-; RV32IZbb-NEXT: andi a0, a0, 15
; RV32IZbb-NEXT: ret
;
; RV64IZbb-LABEL: func3:
; RV64IZbb: # %bb.0:
; RV64IZbb-NEXT: maxu a0, a0, a1
; RV64IZbb-NEXT: sub a0, a0, a1
-; RV64IZbb-NEXT: andi a0, a0, 15
; RV64IZbb-NEXT: ret
%tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %y);
ret i4 %tmp;
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; SSE2-NEXT: psubusw %xmm1, %xmm0
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: retq
;
; SSE41: # %bb.0:
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SSE41-NEXT: psubusw %xmm1, %xmm0
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: packuswb %xmm0, %xmm0
; SSE41-NEXT: retq
;
; SSE42-LABEL: combine_trunc_v8i16_v8i8:
; SSE42: # %bb.0:
; SSE42-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SSE42-NEXT: psubusw %xmm1, %xmm0
-; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSE42-NEXT: packuswb %xmm0, %xmm0
; SSE42-NEXT: retq
;
; AVX-LABEL: combine_trunc_v8i16_v8i8:
; AVX: # %bb.0:
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = zext <8 x i8> %a0 to <8 x i16>
%2 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %1, <8 x i16> %a1)
; X86-NEXT: subb {{[0-9]+}}(%esp), %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: cmovbl %ecx, %eax
-; X86-NEXT: andl $15, %eax
+; X86-NEXT: movzbl %al, %eax
; X86-NEXT: retl
;
; X64-LABEL: func3:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: subb %sil, %dil
-; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: cmovbl %ecx, %eax
-; X64-NEXT: andl $15, %eax
+; X64-NEXT: movzbl %dil, %ecx
+; X64-NEXT: cmovbl %eax, %ecx
+; X64-NEXT: movzbl %cl, %eax
; X64-NEXT: retq
%tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %y)
ret i4 %tmp
; X86-NEXT: subb %al, %cl
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: cmovbl %edx, %eax
-; X86-NEXT: andl $15, %eax
+; X86-NEXT: movzbl %al, %eax
; X86-NEXT: retl
;
; X64-LABEL: func4:
; X64-NEXT: subb %al, %dil
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: cmovbl %ecx, %eax
-; X64-NEXT: andl $15, %eax
+; X64-NEXT: movzbl %al, %eax
; X64-NEXT: retq
%a = mul i4 %y, %z
%tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %a)