if ((ShAmt < DemandedBits.getActiveBits()) &&
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
+ } else {
+ // This is a variable shift, so we can't shift the demand mask by a known
+ // amount. But if we are not demanding high bits, then we are not
+ // demanding those bits from the pre-shifted operand either.
+ if (unsigned CTLZ = DemandedBits.countLeadingZeros()) {
+ APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
+ if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
+ Depth + 1)) {
+ SDNodeFlags Flags = Op.getNode()->getFlags();
+ if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
+ // Disable the nsw and nuw flags. We can no longer guarantee that we
+ // won't wrap after simplification.
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ Op->setFlags(Flags);
+ }
+ return true;
+ }
+ Known.resetAll();
+ }
}
// If we are only demanding sign bits then we can use the shift source
; EG-NEXT: TEX 0 @8
; EG-NEXT: ALU 0, @15, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @10
-; EG-NEXT: ALU 12, @16, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 11, @16, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T7.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: ALU clause starting at 15:
; EG-NEXT: MOV * T7.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 16:
-; EG-NEXT: AND_INT T0.Y, T0.X, literal.x,
-; EG-NEXT: AND_INT T0.Z, T7.X, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: AND_INT T0.Z, T0.X, literal.x,
; EG-NEXT: LSHR T0.W, T0.X, literal.y,
; EG-NEXT: LSHR * T1.W, T7.X, literal.y,
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
; EG-NEXT: LSHL T0.W, PS, PV.W,
-; EG-NEXT: LSHL * T1.W, PV.Z, PV.Y,
+; EG-NEXT: LSHL * T1.W, T7.X, PV.Z,
; EG-NEXT: AND_INT T1.W, PS, literal.x,
; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44)
; EG: ; %bb.0:
; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @6
-; EG-NEXT: ALU 53, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 51, @11, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XY, T0.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: MOV T3.X, T10.W,
; EG-NEXT: MOV * T0.Z, T6.X,
; EG-NEXT: MOV * T1.Y, T2.X,
-; EG-NEXT: AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
+; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL * T1.W, PS, PV.W,
+; EG-NEXT: LSHL * T1.W, T0.X, PV.W,
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
; EG-NEXT: AND_INT * T2.W, T0.Z, literal.y,
; EG-NEXT: 65535(9.183409e-41), -65536(nan)
; EG-NEXT: OR_INT * T1.W, T2.W, PV.W,
; EG-NEXT: MOV T6.X, PV.W,
; EG-NEXT: MOV * T0.X, T7.X,
-; EG-NEXT: AND_INT T1.W, T0.Z, literal.x,
-; EG-NEXT: AND_INT * T2.W, T0.Y, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.Z, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
-; EG-NEXT: LSHL T1.W, PS, PV.W,
+; EG-NEXT: LSHL T1.W, T0.Y, PV.W,
; EG-NEXT: AND_INT * T2.W, T0.X, literal.x,
; EG-NEXT: -65536(nan), 0(0.000000e+00)
; EG-NEXT: AND_INT * T1.W, PV.W, literal.x,
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: s_and_b32 s6, s4, 0xffff
-; VI-NEXT: s_lshr_b32 s4, s4, 16
+; VI-NEXT: s_lshr_b32 s6, s4, 16
; VI-NEXT: s_lshr_b32 s7, s5, 16
-; VI-NEXT: s_lshl_b32 s4, s4, s7
-; VI-NEXT: s_lshl_b32 s5, s6, s5
-; VI-NEXT: s_lshl_b32 s4, s4, 16
-; VI-NEXT: s_and_b32 s5, s5, 0xffff
-; VI-NEXT: s_or_b32 s4, s5, s4
+; VI-NEXT: s_lshl_b32 s6, s6, s7
+; VI-NEXT: s_lshl_b32 s4, s4, s5
+; VI-NEXT: s_lshl_b32 s5, s6, 16
+; VI-NEXT: s_and_b32 s4, s4, 0xffff
+; VI-NEXT: s_or_b32 s4, s4, s5
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
define void @sll_ext(i16 %a, i32 signext %b, i16* %p) nounwind {
; RV32I-LABEL: sll_ext:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sh a0, 0(a2)
; RV32I-NEXT: ret
;
; RV64I-LABEL: sll_ext:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: sh a0, 0(a2)
; RV64I-NEXT: ret
define void @sll_ext_drop_poison(i16 %a, i32 signext %b, i16* %p) nounwind {
; RV32I-LABEL: sll_ext_drop_poison:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sh a0, 0(a2)
; RV32I-NEXT: ret
;
; RV64I-LABEL: sll_ext_drop_poison:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: sh a0, 0(a2)
; RV64I-NEXT: ret
define void @sll_ext(i8 %a, i32 signext %b, i8* %p) nounwind {
; RV32I-LABEL: sll_ext:
; RV32I: # %bb.0:
-; RV32I-NEXT: andi a0, a0, 255
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: ret
;
; RV64I-LABEL: sll_ext:
; RV64I: # %bb.0:
-; RV64I-NEXT: andi a0, a0, 255
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: ret
define void @sll_ext_drop_poison(i8 %a, i32 signext %b, i8* %p) nounwind {
; RV32I-LABEL: sll_ext_drop_poison:
; RV32I: # %bb.0:
-; RV32I-NEXT: andi a0, a0, 255
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: ret
;
; RV64I-LABEL: sll_ext_drop_poison:
; RV64I: # %bb.0:
-; RV64I-NEXT: andi a0, a0, 255
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: ret
define void @sext_shl_trunc_same_size(i16 %x, i32 %y, i16* %res) {
; RV32I-LABEL: sext_shl_trunc_same_size:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai a0, a0, 16
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sh a0, 0(a2)
; RV32I-NEXT: ret
;
; RV64I-LABEL: sext_shl_trunc_same_size:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: sh a0, 0(a2)
; RV64I-NEXT: ret
define void @zext_shl_trunc_same_size(i16 %x, i32 %y, i16* %res) {
; RV32I-LABEL: zext_shl_trunc_same_size:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sh a0, 0(a2)
; RV32I-NEXT: ret
;
; RV64I-LABEL: zext_shl_trunc_same_size:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: sh a0, 0(a2)
; RV64I-NEXT: ret
define void @sext_shl_trunc_smaller(i16 %x, i32 %y, i8* %res) {
; RV32I-LABEL: sext_shl_trunc_smaller:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai a0, a0, 16
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: ret
;
; RV64I-LABEL: sext_shl_trunc_smaller:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: ret
define void @zext_shl_trunc_smaller(i16 %x, i32 %y, i8* %res) {
; RV32I-LABEL: zext_shl_trunc_smaller:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: ret
;
; RV64I-LABEL: zext_shl_trunc_smaller:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: ret
define i32 @sext_shl_mask(i16 %x, i32 %y) {
; RV32I-LABEL: sext_shl_mask:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a0, a0, 16
-; RV32I-NEXT: srai a0, a0, 16
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: slli a0, a0, 16
; RV32I-NEXT: srli a0, a0, 16
;
; RV64I-LABEL: sext_shl_mask:
; RV64I: # %bb.0:
-; RV64I-NEXT: slli a0, a0, 48
-; RV64I-NEXT: srai a0, a0, 48
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: slli a0, a0, 48
; RV64I-NEXT: srli a0, a0, 48
define i32 @zext_shl_mask(i16 %x, i32 %y) {
; RV32I-LABEL: zext_shl_mask:
; RV32I: # %bb.0:
-; RV32I-NEXT: lui a2, 16
-; RV32I-NEXT: addi a2, a2, -1
-; RV32I-NEXT: and a0, a0, a2
; RV32I-NEXT: sll a0, a0, a1
-; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a0, a0, 16
+; RV32I-NEXT: srli a0, a0, 16
; RV32I-NEXT: ret
;
; RV64I-LABEL: zext_shl_mask:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a2, 16
-; RV64I-NEXT: addiw a2, a2, -1
-; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: sllw a0, a0, a1
-; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: slli a0, a0, 48
+; RV64I-NEXT: srli a0, a0, 48
; RV64I-NEXT: ret
%conv = zext i16 %x to i32
%shl = shl i32 %conv, %y
define i32 @set_shl_mask(i32 %x, i32 %y) {
; RV32I-LABEL: set_shl_mask:
; RV32I: # %bb.0:
-; RV32I-NEXT: lui a2, 48
-; RV32I-NEXT: addi a2, a2, 1
-; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: lui a2, 16
+; RV32I-NEXT: addi a3, a2, 1
+; RV32I-NEXT: or a0, a0, a3
; RV32I-NEXT: sll a0, a0, a1
-; RV32I-NEXT: lui a1, 16
-; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: and a0, a0, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: set_shl_mask:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a2, 48
-; RV64I-NEXT: addiw a2, a2, 1
-; RV64I-NEXT: or a0, a0, a2
+; RV64I-NEXT: lui a2, 16
+; RV64I-NEXT: addiw a3, a2, 1
+; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: sllw a0, a0, a1
-; RV64I-NEXT: lui a1, 16
-; RV64I-NEXT: and a0, a0, a1
+; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: ret
%z = or i32 %x, 196609
%s = shl i32 %z, %y
; CHECK-NEXT: add r7, sp, #12
; CHECK-NEXT: .save {r8, r9, r10, r11}
; CHECK-NEXT: push.w {r8, r9, r10, r11}
-; CHECK-NEXT: .pad #12
-; CHECK-NEXT: sub sp, #12
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: wls lr, r1, .LBB2_3
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
-; CHECK-NEXT: mov r4, r2
-; CHECK-NEXT: adds r2, r3, #4
-; CHECK-NEXT: add.w r9, r0, #4
-; CHECK-NEXT: mvn r11, #1
-; CHECK-NEXT: @ implicit-def: $r6
-; CHECK-NEXT: @ implicit-def: $r12
-; CHECK-NEXT: str r4, [sp] @ 4-byte Spill
+; CHECK-NEXT: mov r12, r0
+; CHECK-NEXT: add.w r10, r3, #4
+; CHECK-NEXT: adds r0, #4
+; CHECK-NEXT: mvn r9, #1
+; CHECK-NEXT: @ implicit-def: $r8
+; CHECK-NEXT: @ implicit-def: $r4
+; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
; CHECK-NEXT: .LBB2_2: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr r1, [r9, #-4]
-; CHECK-NEXT: ldr.w r10, [r2]
-; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: muls r1, r3, r1
-; CHECK-NEXT: adds.w r8, r1, #-2147483648
-; CHECK-NEXT: asr.w r5, r1, #31
-; CHECK-NEXT: adc r1, r5, #0
-; CHECK-NEXT: mul r5, r10, r0
-; CHECK-NEXT: mov r0, r2
-; CHECK-NEXT: ldr.w r2, [r11, #4]
-; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: add.w r5, r5, #-2147483648
-; CHECK-NEXT: asrl r8, r1, r5
-; CHECK-NEXT: smull r4, r5, r10, r8
-; CHECK-NEXT: lsll r4, r5, #30
-; CHECK-NEXT: asrs r1, r5, #31
-; CHECK-NEXT: mov r4, r5
-; CHECK-NEXT: lsll r4, r1, r10
-; CHECK-NEXT: lsll r4, r1, #30
-; CHECK-NEXT: ldr.w r4, [r11]
-; CHECK-NEXT: asrs r5, r1, #31
-; CHECK-NEXT: mov r8, r1
-; CHECK-NEXT: muls r4, r6, r4
-; CHECK-NEXT: adds r4, #2
-; CHECK-NEXT: lsll r8, r5, r4
-; CHECK-NEXT: ldr r4, [r9], #4
-; CHECK-NEXT: asr.w r5, r12, #31
-; CHECK-NEXT: add.w r8, r8, #-2147483648
+; CHECK-NEXT: ldr r2, [r0]
+; CHECK-NEXT: asrs r5, r4, #31
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: muls r2, r3, r2
+; CHECK-NEXT: adds r4, r4, r2
+; CHECK-NEXT: adc.w r2, r5, r2, asr #31
+; CHECK-NEXT: ldr.w r5, [r9, #4]
+; CHECK-NEXT: adds.w r4, r4, #-2147483648
+; CHECK-NEXT: adc r1, r2, #0
+; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: smull r5, r6, r5, r8
+; CHECK-NEXT: ldr.w r2, [r9]
+; CHECK-NEXT: asrs r4, r1, #31
+; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: subs r5, r1, r5
+; CHECK-NEXT: sbcs r4, r6
+; CHECK-NEXT: adds.w r6, r5, #-2147483648
+; CHECK-NEXT: adc r5, r4, #0
+; CHECK-NEXT: ldr r4, [r0, #-4]
; CHECK-NEXT: muls r4, r3, r4
; CHECK-NEXT: adds r3, #4
-; CHECK-NEXT: adds.w r1, r12, r4
-; CHECK-NEXT: adc.w r5, r5, r4, asr #31
-; CHECK-NEXT: smull r6, r4, r2, r6
-; CHECK-NEXT: adds.w r1, r1, #-2147483648
-; CHECK-NEXT: adc r1, r5, #0
-; CHECK-NEXT: mov r2, r0
-; CHECK-NEXT: asrs r5, r1, #31
-; CHECK-NEXT: subs r6, r1, r6
-; CHECK-NEXT: sbcs r5, r4
-; CHECK-NEXT: adds.w r6, r6, #-2147483648
-; CHECK-NEXT: adc r5, r5, #0
-; CHECK-NEXT: asrl r6, r5, r8
+; CHECK-NEXT: adds.w r0, r4, #-2147483648
+; CHECK-NEXT: asr.w r1, r4, #31
+; CHECK-NEXT: ldr.w r4, [r10]
+; CHECK-NEXT: adc r1, r1, #0
+; CHECK-NEXT: mul r2, r4, r12
+; CHECK-NEXT: add.w r12, r12, #4
+; CHECK-NEXT: add.w r2, r2, #-2147483648
+; CHECK-NEXT: asrl r0, r1, r2
+; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
+; CHECK-NEXT: smull r0, r1, r4, r0
+; CHECK-NEXT: lsll r0, r1, #30
+; CHECK-NEXT: asr.w r11, r1, #31
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: lsll r0, r11, r4
+; CHECK-NEXT: lsrl r0, r11, #2
+; CHECK-NEXT: mul r1, r1, r8
+; CHECK-NEXT: adds r1, #2
+; CHECK-NEXT: lsll r0, r11, r1
+; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: add.w r0, r0, #-2147483648
+; CHECK-NEXT: asrl r6, r5, r0
+; CHECK-NEXT: movs r0, #2
; CHECK-NEXT: lsrl r6, r5, #2
-; CHECK-NEXT: movs r5, #2
-; CHECK-NEXT: str r6, [r5]
-; CHECK-NEXT: ldr r5, [r11], #-4
-; CHECK-NEXT: mls r1, r5, r10, r1
-; CHECK-NEXT: adds.w r12, r1, #-2147483648
-; CHECK-NEXT: asr.w r4, r1, #31
-; CHECK-NEXT: adc r1, r4, #0
-; CHECK-NEXT: ldrd r4, r0, [sp] @ 8-byte Folded Reload
-; CHECK-NEXT: lsrl r12, r1, #2
-; CHECK-NEXT: rsb.w r1, r12, #0
+; CHECK-NEXT: str r6, [r0]
+; CHECK-NEXT: mov r8, r6
+; CHECK-NEXT: ldr r0, [r9], #-4
+; CHECK-NEXT: mls r0, r0, r4, r1
+; CHECK-NEXT: adds.w r4, r0, #-2147483648
+; CHECK-NEXT: asr.w r1, r0, #31
+; CHECK-NEXT: adc r1, r1, #0
+; CHECK-NEXT: lsrl r4, r1, #2
+; CHECK-NEXT: rsbs r0, r4, #0
+; CHECK-NEXT: str r0, [r2]
+; CHECK-NEXT: str r0, [r10, #-4]
+; CHECK-NEXT: add.w r10, r10, #4
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: adds r0, #4
-; CHECK-NEXT: str r1, [r4]
-; CHECK-NEXT: str r1, [r2, #-4]
-; CHECK-NEXT: adds r2, #4
; CHECK-NEXT: le lr, .LBB2_2
; CHECK-NEXT: .LBB2_3: @ %while.end
-; CHECK-NEXT: add sp, #12
+; CHECK-NEXT: add sp, #16
; CHECK-NEXT: pop.w {r8, r9, r10, r11}
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry: