}
}
- // Reduce bit extract of low half of an integer to the narrower type.
- // (and (srl i64:x, K), KMask) ->
- // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
- if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
- if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
- if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- unsigned Size = VT.getSizeInBits();
- const APInt &AndMask = CAnd->getAPIntValue();
- unsigned ShiftBits = CShift->getZExtValue();
-
- // Bail out, this node will probably disappear anyway.
- if (ShiftBits == 0)
- return SDValue();
-
- unsigned MaskBits = AndMask.countr_one();
- EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
-
- if (AndMask.isMask() &&
- // Required bits must not span the two halves of the integer and
- // must fit in the half size type.
- (ShiftBits + MaskBits <= Size / 2) &&
- TLI.isNarrowingProfitable(VT, HalfVT) &&
- TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
- TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
- TLI.isTruncateFree(VT, HalfVT) &&
- TLI.isZExtFree(HalfVT, VT)) {
- // The isNarrowingProfitable is to avoid regressions on PPC and
- // AArch64 which match a few 64-bit bit insert / bit extract patterns
- // on downstream users of this. Those patterns could probably be
- // extended to handle extensions mixed in.
-
- SDValue SL(N0);
- assert(MaskBits <= Size);
-
- // Extracting the highest bit of the low half.
- EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
- N0.getOperand(0));
-
- SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
- SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
- SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
- SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
- return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
- }
- }
- }
- }
-
return SDValue();
}
if (Op->getFlags().hasExact())
InDemandedMask.setLowBits(ShAmt);
+ // Narrow shift to lower half - similar to ShrinkDemandedOp.
+ // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
+ if ((BitWidth % 2) == 0 && !VT.isVector() &&
+ ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
+ TLO.DAG.MaskedValueIsZero(
+ Op0, APInt::getHighBitsSet(BitWidth, BitWidth / 2)))) {
+ EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
+ if (isNarrowingProfitable(VT, HalfVT) &&
+ isTypeDesirableForOp(ISD::SRL, HalfVT) &&
+ isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
+ (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT))) {
+ SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
+ SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
+ ShAmt, HalfVT, dl, TLO.LegalTypes());
+ SDValue NewShift =
+ TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
+ }
+ }
+
// Compute the new bits that are at the top now.
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
; GFX7-NEXT: s_mov_b32 s2, -1
; GFX7-NEXT: buffer_load_ushort v1, off, s[0:3], 0
; GFX7-NEXT: s_waitcnt vmcnt(2)
-; GFX7-NEXT: v_bfe_i32 v3, v2, 16, 8
; GFX7-NEXT: v_bfe_i32 v4, v2, 0, 8
+; GFX7-NEXT: v_bfe_i32 v3, v2, 16, 8
+; GFX7-NEXT: s_waitcnt vmcnt(1)
+; GFX7-NEXT: v_bfe_i32 v7, v0, 0, 8
; GFX7-NEXT: v_ashrrev_i32_e32 v5, 24, v2
; GFX7-NEXT: v_bfe_i32 v2, v2, 8, 8
-; GFX7-NEXT: s_waitcnt vmcnt(1)
+; GFX7-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX7-NEXT: v_bfe_i32 v6, v0, 16, 8
-; GFX7-NEXT: v_bfe_i32 v7, v0, 0, 8
; GFX7-NEXT: v_ashrrev_i32_e32 v8, 24, v0
; GFX7-NEXT: v_bfe_i32 v0, v0, 8, 8
-; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX7-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_and_b32_e32 v7, 0xffff, v7
-; GFX7-NEXT: v_alignbit_b32 v2, 0, v2, 16
-; GFX7-NEXT: v_alignbit_b32 v0, 0, v0, 16
+; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mad_u32_u24 v1, v4, v7, v1
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX7-NEXT: s_mov_b32 s2, -1
; GFX7-NEXT: buffer_load_ushort v1, off, s[0:3], 0
; GFX7-NEXT: s_waitcnt vmcnt(2)
-; GFX7-NEXT: v_and_b32_e32 v3, 0xff00, v2
-; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v2
-; GFX7-NEXT: s_waitcnt vmcnt(1)
-; GFX7-NEXT: v_and_b32_e32 v6, 0xff00, v0
+; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v2
; GFX7-NEXT: v_and_b32_e32 v5, 0xff, v2
-; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v0
-; GFX7-NEXT: v_alignbit_b32 v2, v4, v2, 16
-; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3
-; GFX7-NEXT: v_and_b32_e32 v4, 0xff, v0
-; GFX7-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; GFX7-NEXT: v_alignbit_b32 v0, v7, v0, 16
-; GFX7-NEXT: v_alignbit_b32 v3, 0, v3, 16
-; GFX7-NEXT: v_alignbit_b32 v6, 0, v6, 16
+; GFX7-NEXT: s_waitcnt vmcnt(1)
+; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v0
+; GFX7-NEXT: v_and_b32_e32 v7, 0xff, v0
+; GFX7-NEXT: v_bfe_u32 v4, v2, 8, 8
+; GFX7-NEXT: v_alignbit_b32 v2, v3, v2, 16
+; GFX7-NEXT: v_bfe_u32 v3, v0, 8, 8
+; GFX7-NEXT: v_alignbit_b32 v0, v6, v0, 16
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_mad_u32_u24 v1, v5, v4, v1
-; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v2
+; GFX7-NEXT: v_mad_u32_u24 v1, v5, v7, v1
+; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v2
-; GFX7-NEXT: v_lshrrev_b32_e32 v8, 16, v0
+; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX7-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX7-NEXT: v_mad_u32_u24 v1, v3, v6, v1
+; GFX7-NEXT: v_mad_u32_u24 v1, v4, v3, v1
; GFX7-NEXT: v_mad_u32_u24 v0, v2, v0, v1
-; GFX7-NEXT: v_mad_u32_u24 v0, v7, v8, v0
+; GFX7-NEXT: v_mad_u32_u24 v0, v6, v5, v0
; GFX7-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX7-NEXT: s_endpgm
;
; GFX7-NEXT: buffer_load_ushort v1, off, s[0:3], 0
; GFX7-NEXT: s_addc_u32 s13, s13, 0
; GFX7-NEXT: s_waitcnt vmcnt(2)
-; GFX7-NEXT: v_bfe_i32 v8, v2, 0, 4
-; GFX7-NEXT: v_bfe_i32 v7, v2, 4, 4
-; GFX7-NEXT: s_waitcnt vmcnt(1)
-; GFX7-NEXT: v_bfe_i32 v15, v0, 0, 4
-; GFX7-NEXT: v_and_b32_e32 v8, 0xffff, v8
-; GFX7-NEXT: v_bfe_i32 v14, v0, 4, 4
-; GFX7-NEXT: v_and_b32_e32 v15, 0xffff, v15
-; GFX7-NEXT: v_bfe_i32 v6, v2, 8, 4
-; GFX7-NEXT: v_and_b32_e32 v7, 0xffff, v7
-; GFX7-NEXT: v_bfe_i32 v13, v0, 8, 4
-; GFX7-NEXT: v_and_b32_e32 v14, 0xffff, v14
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_mad_u32_u24 v1, v8, v15, v1
+; GFX7-NEXT: v_bfe_i32 v6, v2, 0, 4
; GFX7-NEXT: v_bfe_i32 v3, v2, 24, 4
-; GFX7-NEXT: v_bfe_i32 v4, v2, 20, 4
-; GFX7-NEXT: v_bfe_i32 v5, v2, 16, 4
-; GFX7-NEXT: v_ashrrev_i32_e32 v9, 28, v2
-; GFX7-NEXT: v_bfe_i32 v2, v2, 12, 4
+; GFX7-NEXT: s_waitcnt vmcnt(1)
+; GFX7-NEXT: v_bfe_i32 v13, v0, 0, 4
+; GFX7-NEXT: v_bfe_i32 v4, v2, 16, 4
+; GFX7-NEXT: v_bfe_i32 v5, v2, 8, 4
+; GFX7-NEXT: v_ashrrev_i32_e32 v7, 28, v2
+; GFX7-NEXT: v_bfe_i32 v8, v2, 20, 4
+; GFX7-NEXT: v_bfe_i32 v9, v2, 12, 4
+; GFX7-NEXT: v_bfe_i32 v2, v2, 4, 4
; GFX7-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX7-NEXT: v_bfe_i32 v10, v0, 24, 4
-; GFX7-NEXT: v_bfe_i32 v11, v0, 20, 4
-; GFX7-NEXT: v_bfe_i32 v12, v0, 16, 4
-; GFX7-NEXT: v_ashrrev_i32_e32 v16, 28, v0
-; GFX7-NEXT: v_bfe_i32 v0, v0, 12, 4
+; GFX7-NEXT: v_bfe_i32 v11, v0, 16, 4
+; GFX7-NEXT: v_bfe_i32 v12, v0, 8, 4
+; GFX7-NEXT: v_ashrrev_i32_e32 v14, 28, v0
+; GFX7-NEXT: v_bfe_i32 v15, v0, 20, 4
+; GFX7-NEXT: v_bfe_i32 v16, v0, 12, 4
+; GFX7-NEXT: v_bfe_i32 v0, v0, 4, 4
; GFX7-NEXT: v_and_b32_e32 v13, 0xffff, v13
-; GFX7-NEXT: v_mad_u32_u24 v1, v7, v14, v1
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mad_u32_u24 v1, v6, v13, v1
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX7-NEXT: v_and_b32_e32 v12, 0xffff, v12
; GFX7-NEXT: v_mad_u32_u24 v0, v2, v0, v1
+; GFX7-NEXT: v_and_b32_e32 v9, 0xffff, v9
+; GFX7-NEXT: v_and_b32_e32 v16, 0xffff, v16
+; GFX7-NEXT: v_mad_u32_u24 v0, v5, v12, v0
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX7-NEXT: v_and_b32_e32 v11, 0xffff, v11
-; GFX7-NEXT: v_mad_u32_u24 v0, v5, v12, v0
+; GFX7-NEXT: v_mad_u32_u24 v0, v9, v16, v0
+; GFX7-NEXT: v_and_b32_e32 v8, 0xffff, v8
+; GFX7-NEXT: v_and_b32_e32 v15, 0xffff, v15
+; GFX7-NEXT: v_mad_u32_u24 v0, v4, v11, v0
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX7-NEXT: v_and_b32_e32 v10, 0xffff, v10
-; GFX7-NEXT: v_mad_u32_u24 v0, v4, v11, v0
-; GFX7-NEXT: v_and_b32_e32 v9, 0xffff, v9
-; GFX7-NEXT: v_and_b32_e32 v16, 0xffff, v16
+; GFX7-NEXT: v_mad_u32_u24 v0, v8, v15, v0
+; GFX7-NEXT: v_and_b32_e32 v7, 0xffff, v7
+; GFX7-NEXT: v_and_b32_e32 v14, 0xffff, v14
; GFX7-NEXT: v_mad_u32_u24 v0, v3, v10, v0
-; GFX7-NEXT: v_mad_u32_u24 v0, v9, v16, v0
+; GFX7-NEXT: v_mad_u32_u24 v0, v7, v14, v0
; GFX7-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX7-NEXT: s_endpgm
;
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mad_u32_u24 v1, v7, v14, v1
; GFX7-NEXT: v_and_b32_e32 v6, 0xff, v6
-; GFX7-NEXT: v_lshlrev_b32_e32 v9, 24, v9
; GFX7-NEXT: v_and_b32_e32 v13, 0xff, v13
-; GFX7-NEXT: v_lshlrev_b32_e32 v16, 24, v16
; GFX7-NEXT: v_mad_u32_u24 v0, v2, v0, v1
-; GFX7-NEXT: v_alignbit_b32 v9, 0, v9, 24
-; GFX7-NEXT: v_alignbit_b32 v16, 0, v16, 24
+; GFX7-NEXT: v_and_b32_e32 v9, 0xff, v9
+; GFX7-NEXT: v_and_b32_e32 v16, 0xff, v16
; GFX7-NEXT: v_mad_u32_u24 v0, v6, v13, v0
; GFX7-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX7-NEXT: v_and_b32_e32 v12, 0xff, v12
; GFX7-NEXT: buffer_load_ubyte v1, off, s[0:3], 0
; GFX7-NEXT: s_addc_u32 s13, s13, 0
; GFX7-NEXT: s_waitcnt vmcnt(2)
-; GFX7-NEXT: v_and_b32_e32 v9, 15, v2
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 28, v2
-; GFX7-NEXT: s_waitcnt vmcnt(1)
-; GFX7-NEXT: v_and_b32_e32 v16, 15, v0
; GFX7-NEXT: v_bfe_u32 v4, v2, 24, 4
; GFX7-NEXT: v_bfe_u32 v5, v2, 20, 4
; GFX7-NEXT: v_bfe_u32 v6, v2, 16, 4
-; GFX7-NEXT: v_bfe_u32 v7, v2, 8, 4
-; GFX7-NEXT: v_bfe_u32 v8, v2, 4, 4
-; GFX7-NEXT: v_lshlrev_b32_e32 v2, 12, v2
+; GFX7-NEXT: v_bfe_u32 v7, v2, 12, 4
+; GFX7-NEXT: v_bfe_u32 v8, v2, 8, 4
+; GFX7-NEXT: v_bfe_u32 v9, v2, 4, 4
+; GFX7-NEXT: v_and_b32_e32 v2, 15, v2
+; GFX7-NEXT: s_waitcnt vmcnt(1)
; GFX7-NEXT: v_lshrrev_b32_e32 v10, 28, v0
; GFX7-NEXT: v_bfe_u32 v11, v0, 24, 4
; GFX7-NEXT: v_bfe_u32 v12, v0, 20, 4
; GFX7-NEXT: v_bfe_u32 v13, v0, 16, 4
-; GFX7-NEXT: v_bfe_u32 v14, v0, 8, 4
-; GFX7-NEXT: v_bfe_u32 v15, v0, 4, 4
-; GFX7-NEXT: v_lshlrev_b32_e32 v0, 12, v0
+; GFX7-NEXT: v_bfe_u32 v14, v0, 12, 4
+; GFX7-NEXT: v_bfe_u32 v15, v0, 8, 4
+; GFX7-NEXT: v_bfe_u32 v16, v0, 4, 4
+; GFX7-NEXT: v_and_b32_e32 v0, 15, v0
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_mad_u32_u24 v1, v9, v16, v1
-; GFX7-NEXT: v_and_b32_e32 v2, 0xf000000, v2
-; GFX7-NEXT: v_and_b32_e32 v0, 0xf000000, v0
-; GFX7-NEXT: v_mad_u32_u24 v1, v8, v15, v1
-; GFX7-NEXT: v_alignbit_b32 v2, s10, v2, 24
-; GFX7-NEXT: v_alignbit_b32 v0, 0, v0, 24
-; GFX7-NEXT: v_mad_u32_u24 v1, v7, v14, v1
; GFX7-NEXT: v_mad_u32_u24 v0, v2, v0, v1
+; GFX7-NEXT: v_mad_u32_u24 v0, v9, v16, v0
+; GFX7-NEXT: v_mad_u32_u24 v0, v8, v15, v0
+; GFX7-NEXT: v_mad_u32_u24 v0, v7, v14, v0
; GFX7-NEXT: v_mad_u32_u24 v0, v6, v13, v0
; GFX7-NEXT: v_mad_u32_u24 v0, v5, v12, v0
; GFX7-NEXT: v_mad_u32_u24 v0, v4, v11, v0
; GFX67-SDAG-LABEL: clpeak_imad_pat_v3i16:
; GFX67-SDAG: ; %bb.0: ; %entry
; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT: v_alignbit_b32 v7, 0, v7, 16
-; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v0
+; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
+; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v0
+; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v1
; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v7, v4, v1
+; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v9, v7, v4
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v8, v3, v0
-; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v8, v8, v3, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v2
+; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v9, v8, v4
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v6, v3, v0
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v8, v4, v1
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v6, v6, v3, 1
+; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v2
; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
-; GFX67-SDAG-NEXT: v_alignbit_b32 v1, 0, v1, 16
; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
+; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v9
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v6, v5, v2
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v7, v0, v3
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v7, v5, v2
+; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v8, v0, v3
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v4
-; GFX67-SDAG-NEXT: v_or_b32_e32 v8, v9, v8
+; GFX67-SDAG-NEXT: v_or_b32_e32 v6, v9, v6
; GFX67-SDAG-NEXT: s_mov_b32 s4, 0x10000
; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v3, 1
; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT: v_add_i32_e32 v8, vcc, s4, v8
+; GFX67-SDAG-NEXT: v_add_i32_e32 v6, vcc, s4, v6
; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v2, v5
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v6, v6, v5, 1
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v7, v7, v5, 1
; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v3, v0
-; GFX67-SDAG-NEXT: v_alignbit_b32 v3, 0, v8, 16
+; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v6
; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
+; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v2, v5, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v7
-; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v8
+; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v8
+; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v3
; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4
; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, s4, v0
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v5, v7
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v6
+; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v5, v6
+; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v7
; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT: v_alignbit_b32 v4, 0, v0, 16
+; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-SDAG: ; %bb.0: ; %entry
; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 1, v3
-; GFX67-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v3
+; GFX67-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v3
; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v11, v7, v3
+; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
+; GFX67-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v2
; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v10, v7, v3
-; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v2
-; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v1
; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT: v_alignbit_b32 v9, 0, v9, 16
+; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v0
+; GFX67-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v1
+; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v13, v10, v7
+; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v13, v11, v7
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v7
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v7, v8, v6, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v8, v6, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v9, v5, v1
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v7, v9, v6, 1
+; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v12, v10, v5
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v9, v6, v2
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v8, v4, v0
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v10, v5, v1
; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
-; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v8, 16, v13
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v12, v9, v5
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v11, v4, v0
-; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX67-SDAG-NEXT: v_or_b32_e32 v7, v8, v7
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v8, v11, v4, 1
-; GFX67-SDAG-NEXT: v_alignbit_b32 v1, 0, v1, 16
+; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v13
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v8, v8, v4, 1
; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-SDAG-NEXT: v_or_b32_e32 v7, v9, v7
; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
-; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v10, 16, v12
+; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v12
; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v9, v0, v4
+; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v10, v0, v4
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v5
; GFX67-SDAG-NEXT: s_mov_b32 s4, 0x10000
-; GFX67-SDAG-NEXT: v_or_b32_e32 v8, v10, v8
+; GFX67-SDAG-NEXT: v_or_b32_e32 v8, v9, v8
; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v4, 1
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v2, v6
; GFX67-SDAG-NEXT: v_add_i32_e32 v8, vcc, s4, v8
; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v4, v0
-; GFX67-SDAG-NEXT: v_alignbit_b32 v4, 0, v8, 16
+; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v8
; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-SDAG-NEXT: v_or_b32_e32 v2, v6, v2
; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v7
-; GFX67-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v9
+; GFX67-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v10
; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v4
; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v5
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v8, v9, v8
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v4, v5
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v6
-; GFX67-SDAG-NEXT: v_alignbit_b32 v5, 0, v0, 16
+; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v8
; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-SDAG-LABEL: clpeak_umad_pat_v3i16:
; GFX67-SDAG: ; %bb.0: ; %entry
; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT: v_alignbit_b32 v7, 0, v7, 16
-; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v0
+; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
+; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v0
+; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v1
; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v7, v4, v1
+; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v9, v7, v4
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v8, v3, v0
-; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v8, v8, v3, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v2
+; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v9, v8, v4
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v6, v3, v0
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v8, v4, v1
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v6, v6, v3, 1
+; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v2
; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
-; GFX67-SDAG-NEXT: v_alignbit_b32 v1, 0, v1, 16
; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
+; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v9
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v6, v5, v2
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v7, v0, v3
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v7, v5, v2
+; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v8, v0, v3
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v4
-; GFX67-SDAG-NEXT: v_or_b32_e32 v8, v9, v8
+; GFX67-SDAG-NEXT: v_or_b32_e32 v6, v9, v6
; GFX67-SDAG-NEXT: s_mov_b32 s4, 0x10000
; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v3, 1
; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT: v_add_i32_e32 v8, vcc, s4, v8
+; GFX67-SDAG-NEXT: v_add_i32_e32 v6, vcc, s4, v6
; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v2, v5
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v6, v6, v5, 1
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v7, v7, v5, 1
; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v3, v0
-; GFX67-SDAG-NEXT: v_alignbit_b32 v3, 0, v8, 16
+; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v6
; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
+; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v2, v5, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v7
-; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v8
+; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v8
+; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v3
; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4
; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, s4, v0
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v5, v7
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v6
+; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v5, v6
+; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v7
; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT: v_alignbit_b32 v4, 0, v0, 16
+; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-SDAG: ; %bb.0: ; %entry
; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 1, v3
-; GFX67-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v3
+; GFX67-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v3
; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v11, v7, v3
+; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
+; GFX67-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v2
; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v10, v7, v3
-; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v2
-; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v1
; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
-; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
-; GFX67-SDAG-NEXT: v_alignbit_b32 v9, 0, v9, 16
+; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v0
+; GFX67-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v1
+; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v13, v10, v7
+; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v13, v11, v7
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v7
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v7, v8, v6, 1
-; GFX67-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v0
-; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v8, v6, v2
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v9, v5, v1
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v7, v9, v6, 1
+; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v12, v10, v5
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v9, v6, v2
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v8, v4, v0
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v10, v5, v1
; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
-; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v8, 16, v13
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v12, v9, v5
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v11, v4, v0
-; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX67-SDAG-NEXT: v_or_b32_e32 v7, v8, v7
-; GFX67-SDAG-NEXT: v_mad_u32_u24 v8, v11, v4, 1
-; GFX67-SDAG-NEXT: v_alignbit_b32 v1, 0, v1, 16
+; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v13
+; GFX67-SDAG-NEXT: v_mad_u32_u24 v8, v8, v4, 1
; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX67-SDAG-NEXT: v_or_b32_e32 v7, v9, v7
; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
-; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v10, 16, v12
+; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v12
; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v9, v0, v4
+; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v10, v0, v4
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v5
; GFX67-SDAG-NEXT: s_mov_b32 s4, 0x10000
-; GFX67-SDAG-NEXT: v_or_b32_e32 v8, v10, v8
+; GFX67-SDAG-NEXT: v_or_b32_e32 v8, v9, v8
; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v4, 1
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v2, v6
; GFX67-SDAG-NEXT: v_add_i32_e32 v8, vcc, s4, v8
; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v4, v0
-; GFX67-SDAG-NEXT: v_alignbit_b32 v4, 0, v8, 16
+; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v8
; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX67-SDAG-NEXT: v_or_b32_e32 v2, v6, v2
; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v7
-; GFX67-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v9
+; GFX67-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v10
; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v4
; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v5
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v8, v9, v8
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v4, v5
; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v6
-; GFX67-SDAG-NEXT: v_alignbit_b32 v5, 0, v0, 16
+; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v8
; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_and_b32_e32 v0, 0xa000000, v0
-; GCN-NEXT: v_alignbit_b32 v0, 0, v0, 25
+; GCN-NEXT: v_lshrrev_b32_e32 v0, 25, v0
; GCN-NEXT: v_add_u32_e32 v0, 55, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%value.knownbits2 = and i64 %x, 167772160 ; 0xA000000
; NOSDWA-NEXT: v_mov_b32_e32 v2, s2
; NOSDWA-NEXT: v_mov_b32_e32 v3, s3
; NOSDWA-NEXT: s_waitcnt vmcnt(0)
-; NOSDWA-NEXT: v_lshrrev_b64 v[4:5], 24, v[0:1]
-; NOSDWA-NEXT: v_and_b32_e32 v6, 0xff, v0
-; NOSDWA-NEXT: v_lshrrev_b32_e32 v7, 8, v0
+; NOSDWA-NEXT: v_and_b32_e32 v4, 0xff, v0
+; NOSDWA-NEXT: v_lshrrev_b32_e32 v5, 8, v0
+; NOSDWA-NEXT: v_lshrrev_b32_e32 v6, 24, v0
; NOSDWA-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; NOSDWA-NEXT: v_and_b32_e32 v5, 0xff, v1
+; NOSDWA-NEXT: v_and_b32_e32 v7, 0xff, v1
; NOSDWA-NEXT: v_lshrrev_b32_e32 v8, 8, v1
; NOSDWA-NEXT: v_lshrrev_b32_e32 v9, 24, v1
; NOSDWA-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; NOSDWA-NEXT: v_lshlrev_b16_e32 v7, 8, v7
+; NOSDWA-NEXT: v_lshlrev_b16_e32 v5, 8, v5
+; NOSDWA-NEXT: v_lshlrev_b16_e32 v6, 8, v6
; NOSDWA-NEXT: v_and_b32_e32 v0, 0xff, v0
; NOSDWA-NEXT: v_lshlrev_b16_e32 v8, 8, v8
; NOSDWA-NEXT: v_lshlrev_b16_e32 v9, 8, v9
; NOSDWA-NEXT: v_and_b32_e32 v1, 0xff, v1
-; NOSDWA-NEXT: v_lshlrev_b16_e32 v4, 8, v4
-; NOSDWA-NEXT: v_or_b32_e32 v6, v6, v7
-; NOSDWA-NEXT: v_or_b32_e32 v5, v5, v8
+; NOSDWA-NEXT: v_or_b32_e32 v4, v4, v5
+; NOSDWA-NEXT: v_or_b32_e32 v0, v0, v6
+; NOSDWA-NEXT: v_or_b32_e32 v5, v7, v8
; NOSDWA-NEXT: v_or_b32_e32 v1, v1, v9
-; NOSDWA-NEXT: v_or_b32_e32 v0, v0, v4
-; NOSDWA-NEXT: v_and_b32_e32 v6, 0xffff, v6
-; NOSDWA-NEXT: v_and_b32_e32 v4, 0xffff, v5
-; NOSDWA-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; NOSDWA-NEXT: v_and_b32_e32 v4, 0xffff, v4
; NOSDWA-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; NOSDWA-NEXT: v_or_b32_e32 v0, v6, v0
-; NOSDWA-NEXT: v_or_b32_e32 v1, v4, v1
+; NOSDWA-NEXT: v_and_b32_e32 v5, 0xffff, v5
+; NOSDWA-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; NOSDWA-NEXT: v_or_b32_e32 v0, v4, v0
+; NOSDWA-NEXT: v_or_b32_e32 v1, v5, v1
; NOSDWA-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; NOSDWA-NEXT: s_endpgm
;
; GFX89-LABEL: pulled_out_test:
; GFX89: ; %bb.0: ; %entry
; GFX89-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX89-NEXT: v_mov_b32_e32 v6, 8
-; GFX89-NEXT: v_mov_b32_e32 v7, 0xff
+; GFX89-NEXT: v_mov_b32_e32 v4, 8
+; GFX89-NEXT: v_mov_b32_e32 v5, 0xff
; GFX89-NEXT: s_waitcnt lgkmcnt(0)
; GFX89-NEXT: v_mov_b32_e32 v0, s0
; GFX89-NEXT: v_mov_b32_e32 v1, s1
; GFX89-NEXT: v_mov_b32_e32 v2, s2
; GFX89-NEXT: v_mov_b32_e32 v3, s3
; GFX89-NEXT: s_waitcnt vmcnt(0)
-; GFX89-NEXT: v_lshrrev_b64 v[4:5], 24, v[0:1]
-; GFX89-NEXT: v_lshrrev_b32_sdwa v8, v6, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX89-NEXT: v_lshrrev_b32_sdwa v6, v6, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX89-NEXT: v_lshrrev_b32_sdwa v6, v4, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX89-NEXT: v_lshrrev_b32_e32 v7, 24, v0
+; GFX89-NEXT: v_lshrrev_b32_sdwa v4, v4, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX89-NEXT: v_lshrrev_b32_e32 v9, 24, v1
-; GFX89-NEXT: v_and_b32_sdwa v5, v0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX89-NEXT: v_and_b32_sdwa v7, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX89-NEXT: v_or_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX89-NEXT: v_lshlrev_b16_e32 v6, 8, v9
-; GFX89-NEXT: v_lshlrev_b16_e32 v4, 8, v4
-; GFX89-NEXT: v_or_b32_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX89-NEXT: v_or_b32_sdwa v6, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX89-NEXT: v_and_b32_sdwa v8, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX89-NEXT: v_and_b32_sdwa v5, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX89-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX89-NEXT: v_lshlrev_b16_e32 v6, 8, v7
+; GFX89-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX89-NEXT: v_lshlrev_b16_e32 v4, 8, v9
+; GFX89-NEXT: v_or_b32_sdwa v6, v8, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX89-NEXT: v_or_b32_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX89-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX89-NEXT: v_or_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX89-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX89-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX89-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX89-NEXT: s_endpgm
;
; GFX9-LABEL: pulled_out_test:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 8
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mov_b32_e32 v3, 8
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1]
+; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
; GFX9-NEXT: s_movk_i32 s0, 0xff
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_lshrrev_b64 v[2:3], 24, v[0:1]
-; GFX9-NEXT: v_lshrrev_b32_sdwa v6, v5, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT: v_lshrrev_b32_sdwa v5, v5, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT: v_lshrrev_b32_sdwa v4, v3, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v0
+; GFX9-NEXT: v_lshrrev_b32_sdwa v3, v3, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1
-; GFX9-NEXT: v_and_b32_sdwa v3, v0, s0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: v_and_b32_sdwa v6, v0, s0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-NEXT: v_and_b32_sdwa v8, v1, s0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT: v_or_b32_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX9-NEXT: v_lshlrev_b16_e32 v5, 8, v7
-; GFX9-NEXT: v_lshlrev_b16_e32 v2, 8, v2
-; GFX9-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX9-NEXT: v_or_b32_sdwa v5, v8, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX9-NEXT: v_or_b32_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[2:3]
+; GFX9-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX9-NEXT: v_lshlrev_b16_e32 v4, 8, v5
+; GFX9-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX9-NEXT: v_lshlrev_b16_e32 v3, 8, v7
+; GFX9-NEXT: v_or_b32_sdwa v4, v6, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT: v_or_b32_sdwa v3, v8, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX9-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: pulled_out_test:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-NEXT: v_mov_b32_e32 v4, 0
-; GFX10-NEXT: v_mov_b32_e32 v5, 8
-; GFX10-NEXT: v_mov_b32_e32 v6, 0xff
-; GFX10-NEXT: v_mov_b32_e32 v7, 24
+; GFX10-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-NEXT: v_mov_b32_e32 v3, 8
+; GFX10-NEXT: v_mov_b32_e32 v4, 24
+; GFX10-NEXT: v_mov_b32_e32 v5, 0xff
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1]
+; GFX10-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_lshrrev_b64 v[2:3], 24, v[0:1]
-; GFX10-NEXT: v_lshrrev_b32_sdwa v3, v5, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT: v_and_b32_sdwa v8, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX10-NEXT: v_lshrrev_b32_sdwa v5, v5, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT: v_lshrrev_b32_sdwa v7, v7, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT: v_and_b32_sdwa v6, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX10-NEXT: v_lshlrev_b16 v2, 8, v2
-; GFX10-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX10-NEXT: v_or_b32_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX10-NEXT: v_or_b32_sdwa v3, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT: v_or_b32_sdwa v2, v8, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX10-NEXT: v_lshrrev_b32_sdwa v6, v3, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX10-NEXT: v_lshrrev_b32_sdwa v7, v4, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX10-NEXT: v_and_b32_sdwa v8, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-NEXT: v_lshrrev_b32_sdwa v3, v3, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX10-NEXT: v_lshrrev_b32_sdwa v4, v4, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX10-NEXT: v_and_b32_sdwa v5, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX10-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX10-NEXT: v_or_b32_sdwa v6, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX10-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; GFX10-NEXT: v_or_b32_sdwa v3, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX10-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX10-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX10-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX10-NEXT: global_store_dwordx2 v4, v[0:1], s[2:3]
+; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
; GFX10-NEXT: s_endpgm
entry:
%idxprom = ashr exact i64 15, 32
; GCN-NEXT: s_mov_b64 s[4:5], 0x41
; GCN-NEXT: v_lshr_b64 v[1:2], s[4:5], v0
; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v0
-; GCN-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0
; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-NEXT: v_mov_b32_e32 v3, 0x41
-; GCN-NEXT: s_and_b64 vcc, s[4:5], vcc
-; GCN-NEXT: v_cndmask_b32_e64 v0, v3, v1, s[4:5]
-; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-NEXT: v_mov_b32_e32 v2, 0x41
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
+; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_lshr_b64 v[1:2], 33, v0
; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v0
-; GCN-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0
; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
-; GCN-NEXT: s_and_b64 vcc, s[4:5], vcc
-; GCN-NEXT: v_cndmask_b32_e64 v0, 33, v1, s[4:5]
-; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GCN-NEXT: v_cndmask_b32_e32 v0, 33, v1, vcc
+; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: s_setpc_b64 s[30:31]
; GFX1032-NEXT: v_cmp_le_u32_e32 vcc_lo, s0, v0
; GFX1032-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1032-NEXT: v_alignbit_b32 v0, 0, vcc_lo, 1
-; GFX1032-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1032-NEXT: s_lshr_b32 s0, vcc_lo, 1
; GFX1032-NEXT: s_ff1_i32_b32 s0, s0
; GFX1032-NEXT: s_min_u32 s0, s0, s1
; GFX1032-NEXT: s_cmp_gt_u32 s0, 9
; GFX1032-NEXT: v_trunc_f32_e32 v1, v1
; GFX1032-NEXT: v_fma_f32 v0, -v1, s0, v0
; GFX1032-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v0
-; GFX1032-NEXT: v_alignbit_b32 v1, 0, vcc_lo, 1
+; GFX1032-NEXT: s_lshr_b32 s0, vcc_lo, 1
; GFX1032-NEXT: v_cmp_nlg_f32_e32 vcc_lo, 0, v0
-; GFX1032-NEXT: v_readfirstlane_b32 s0, v1
; GFX1032-NEXT: s_ff1_i32_b32 s0, s0
; GFX1032-NEXT: s_min_u32 s0, s0, s1
; GFX1032-NEXT: s_cmp_gt_u32 s0, 9
; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movb %ah, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: shrq $16, %rsi
-; CHECK-NEXT: movb %sil, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: shrq $24, %rax
+; CHECK-NEXT: shrq $16, %rax
; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movb %ah, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movw %dx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movb %r8b, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movl %edx, %edx
; CHECK-NEXT: movl (%rdi,%rdx,4), %edx
; CHECK-NEXT: movzbl %dl, %r10d
+; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
+; CHECK-NEXT: shrl $8, %edx
; CHECK-NEXT: addl $4, %r10d
-; CHECK-NEXT: shrq $6, %rdx
-; CHECK-NEXT: andl $67108860, %edx # imm = 0x3FFFFFC
-; CHECK-NEXT: movl (%rdi,%rdx), %edx
+; CHECK-NEXT: movl (%rdi,%rdx,4), %edx
; CHECK-NEXT: movzbl %dl, %edi
; CHECK-NEXT: shrl $8, %edx
; CHECK-NEXT: addl $5, %esi
define i64 @test2(i8 %A, i8 %B) nounwind {
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: shll $4, %edi
; CHECK-NEXT: andl $48, %edi
; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: shrq $4, %rax
-; CHECK-NEXT: orq %rdi, %rax
+; CHECK-NEXT: shrl $4, %eax
+; CHECK-NEXT: orl %edi, %eax
; CHECK-NEXT: retq
%C = zext i8 %A to i64
%D = shl i64 %C, 4
; X64-LABEL: shift30_and2_i64:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shrq $30, %rax
-; X64-NEXT: andl $2, %eax
+; X64-NEXT: shrl $30, %eax
+; X64-NEXT: andl $-2, %eax
; X64-NEXT: retq
%shr = lshr i64 %x, 30
%and = and i64 %shr, 2
; CHECK64-LABEL: not_bswap:
; CHECK64: # %bb.0:
; CHECK64-NEXT: movzwl var16(%rip), %eax
-; CHECK64-NEXT: movq %rax, %rcx
-; CHECK64-NEXT: shrq $8, %rcx
+; CHECK64-NEXT: movl %eax, %ecx
+; CHECK64-NEXT: shrl $8, %ecx
; CHECK64-NEXT: shlq $8, %rax
; CHECK64-NEXT: orq %rcx, %rax
; CHECK64-NEXT: retq
;
; CHECK64-LABEL: finally_useful_bswap:
; CHECK64: # %bb.0:
-; CHECK64-NEXT: movzwl var16(%rip), %eax
-; CHECK64-NEXT: bswapq %rax
-; CHECK64-NEXT: shrq $48, %rax
+; CHECK64-NEXT: movzwl var16(%rip), %ecx
+; CHECK64-NEXT: movzbl %cl, %eax
+; CHECK64-NEXT: # kill: def $ecx killed $ecx killed $rcx def $rcx
+; CHECK64-NEXT: shrl $8, %ecx
+; CHECK64-NEXT: shlq $8, %rax
+; CHECK64-NEXT: orq %rcx, %rax
; CHECK64-NEXT: retq
%init = load i16, ptr @var16
%big = zext i16 %init to i64
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: andl $235867919, %ecx # imm = 0xE0F0F0F
; X64-NEXT: shlq $4, %rcx
-; X64-NEXT: shrq $4, %rax
+; X64-NEXT: shrl $4, %eax
; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
; X64-NEXT: orq %rcx, %rax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: andl $590558003, %ecx # imm = 0x23333333
-; X64-NEXT: shrq $2, %rax
+; X64-NEXT: shrl $2, %eax
; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
; X64-NEXT: leaq (%rax,%rcx,4), %rax
-; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
-; X64-NEXT: movq %rax, %rdx
-; X64-NEXT: andq %rcx, %rdx
-; X64-NEXT: shrq %rax
-; X64-NEXT: andq %rcx, %rax
-; X64-NEXT: leaq (%rax,%rdx,2), %rax
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: andl $357913941, %ecx # imm = 0x15555555
+; X64-NEXT: shrl %eax
+; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
+; X64-NEXT: leaq (%rax,%rcx,2), %rax
; X64-NEXT: retq
%1 = call i64 @llvm.bitreverse.i64(i64 %a)
%2 = shl i64 %1, 33
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: andl $2147483646, %eax # imm = 0x7FFFFFFE
-; X64-NEXT: shrq %rax
+; X64-NEXT: shrl %eax
; X64-NEXT: retq
%t0 = and i64 %a0, 2147483647
%t1 = lshr i64 %t0, 1
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: andl $2147483646, %eax # imm = 0x7FFFFFFE
-; X64-NEXT: shrq %rax
+; X64-NEXT: shrl %eax
; X64-NEXT: retq
%t0 = and i64 %a0, 2147483647
%t1 = ashr i64 %t0, 1
;
; X64-NOBMI-LABEL: pr38938:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: movq (%rsi), %rax
-; X64-NOBMI-NEXT: shrq $19, %rax
-; X64-NOBMI-NEXT: andl $4092, %eax # imm = 0xFFC
-; X64-NOBMI-NEXT: incl (%rdi,%rax)
+; X64-NOBMI-NEXT: movl (%rsi), %eax
+; X64-NOBMI-NEXT: shrl $21, %eax
+; X64-NOBMI-NEXT: andl $1023, %eax # imm = 0x3FF
+; X64-NOBMI-NEXT: incl (%rdi,%rax,4)
; X64-NOBMI-NEXT: retq
;
; X64-BMINOTBM-LABEL: pr38938:
; X64-BMINOTBM: # %bb.0:
; X64-BMINOTBM-NEXT: movl $2581, %eax # imm = 0xA15
-; X64-BMINOTBM-NEXT: bextrq %rax, (%rsi), %rax
+; X64-BMINOTBM-NEXT: bextrl %eax, (%rsi), %eax
; X64-BMINOTBM-NEXT: incl (%rdi,%rax,4)
; X64-BMINOTBM-NEXT: retq
;
; X64-BMITBM-LABEL: pr38938:
; X64-BMITBM: # %bb.0:
-; X64-BMITBM-NEXT: bextrq $2581, (%rsi), %rax # imm = 0xA15
+; X64-BMITBM-NEXT: bextrl $2581, (%rsi), %eax # imm = 0xA15
; X64-BMITBM-NEXT: incl (%rdi,%rax,4)
; X64-BMITBM-NEXT: retq
%tmp = load i64, ptr %a1, align 8
define void @bar64(i64 inreg %x, ptr inreg %p) nounwind {
; X64-LABEL: bar64:
; X64: # %bb.0:
-; X64-NEXT: shrq $8, %rdi
+; X64-NEXT: shrl $8, %edi
; X64-NEXT: incb %dil
; X64-NEXT: movb %dil, (%rsi)
; X64-NEXT: retq
;
; X32-LABEL: bar64:
; X32: # %bb.0:
-; X32-NEXT: shrq $8, %rdi
+; X32-NEXT: shrl $8, %edi
; X32-NEXT: incb %dil
; X32-NEXT: movb %dil, (%esi)
; X32-NEXT: retq
;
; WIN64-LABEL: bar64:
; WIN64: # %bb.0:
-; WIN64-NEXT: shrq $8, %rcx
+; WIN64-NEXT: shrl $8, %ecx
; WIN64-NEXT: incb %cl
; WIN64-NEXT: movb %cl, (%rdx)
; WIN64-NEXT: retq
; X64-BSR-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
; X64-BSR: # %bb.0:
; X64-BSR-NEXT: bsrq %rdi, %rax
-; X64-BSR-NEXT: shrq $6, %rax
+; X64-BSR-NEXT: shrl $6, %eax
; X64-BSR-NEXT: cmpl $1, %eax
; X64-BSR-NEXT: sete %al
; X64-BSR-NEXT: retq
; X64-LZCNT-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
; X64-LZCNT: # %bb.0:
; X64-LZCNT-NEXT: lzcntq %rdi, %rax
-; X64-LZCNT-NEXT: shrq $6, %rax
+; X64-LZCNT-NEXT: shrl $6, %eax
; X64-LZCNT-NEXT: cmpl $1, %eax
; X64-LZCNT-NEXT: sete %al
; X64-LZCNT-NEXT: retq
; X64-BSR-LABEL: lshr_ctlz_undef_cmpne_zero_i64:
; X64-BSR: # %bb.0:
; X64-BSR-NEXT: bsrq %rdi, %rax
-; X64-BSR-NEXT: testq $-64, %rax
+; X64-BSR-NEXT: testl $-64, %eax
; X64-BSR-NEXT: setne %al
; X64-BSR-NEXT: retq
;
;
; X64-LABEL: test4:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: shrq $2, %rax
-; X64-NEXT: andl $60, %eax
+; X64-NEXT: movzbl (%rdi), %eax
+; X64-NEXT: shrl $2, %eax
+; X64-NEXT: andl $-4, %eax
; X64-NEXT: retq
entry:
%bf.load = load i8, ptr %data, align 4
; X64-LABEL: test5:
; X64: # %bb.0: # %entry
; X64-NEXT: movzbl (%rdi), %eax
-; X64-NEXT: shrq $2, %rax
+; X64-NEXT: shrl $2, %eax
; X64-NEXT: xorq $60, %rax
; X64-NEXT: retq
entry:
; X64-LABEL: test6:
; X64: # %bb.0: # %entry
; X64-NEXT: movzbl (%rdi), %eax
-; X64-NEXT: shrq $2, %rax
+; X64-NEXT: shrl $2, %eax
; X64-NEXT: orq $60, %rax
; X64-NEXT: retq
entry:
; X64-LABEL: i64_zext_shift_i16_zext_i8:
; X64: # %bb.0:
; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: shrq $5, %rax
+; X64-NEXT: shrl $5, %eax
; X64-NEXT: retq
%t0 = zext i8 %a0 to i16
%t1 = lshr i16 %t0, 5
; X64-LABEL: i128_zext_shift_i64_zext_i8:
; X64: # %bb.0:
; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: shrq $4, %rax
+; X64-NEXT: shrl $4, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: retq
%t0 = zext i8 %a0 to i64
; X64-LABEL: i128_zext_shift_i64_zext_i16:
; X64: # %bb.0:
; X64-NEXT: movzwl %di, %eax
-; X64-NEXT: shrq $7, %rax
+; X64-NEXT: shrl $7, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: retq
%t0 = zext i16 %a0 to i64