Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
break;
}
+ case RISCVISD::SHFLI: {
+ // There is no SHFLIW instruction, but we can just promote the operation.
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ "Unexpected custom legalisation");
+ SDLoc DL(N);
+ SDValue NewOp0 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+ SDValue NewRes =
+ DAG.getNode(RISCVISD::SHFLI, DL, MVT::i64, NewOp0, N->getOperand(1));
+ // ReplaceNodeResults requires we maintain the same type for the return
+ // value.
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
+ break;
+ }
case ISD::BSWAP:
case ISD::BITREVERSE: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
}
};
-// Matches any of the following bit-manipulation patterns:
-// (and (shl x, 1), (0x55555555 << 1))
-// (and (srl x, 1), 0x55555555)
-// (shl (and x, 0x55555555), 1)
-// (srl (and x, (0x55555555 << 1)), 1)
-// where the shift amount and mask may vary thus:
-// [1] = 0x55555555 / 0xAAAAAAAA
-// [2] = 0x33333333 / 0xCCCCCCCC
-// [4] = 0x0F0F0F0F / 0xF0F0F0F0
-// [8] = 0x00FF00FF / 0xFF00FF00
-// [16] = 0x0000FFFF / 0xFFFFFFFF
-// [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
-static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) {
+// Matches patterns of the form
+// (and (shl x, C2), (C1 << C2))
+// (and (srl x, C2), C1)
+// (shl (and x, C1), C2)
+// (srl (and x, (C1 << C2)), C2)
+// Where C2 is a power of 2 and C1 has at least that many leading zeroes.
+// The expected masks for each shift amount are specified in BitmanipMasks where
+// BitmanipMasks[log2(C2)] specifies the expected C1 value.
+// The max allowed shift amount is either XLen/2 or XLen/4 determined by whether
+// BitmanipMasks contains 6 or 5 entries assuming that the maximum possible
+// XLen is 64.
+static Optional<RISCVBitmanipPat>
+matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) {
+ assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) &&
+ "Unexpected number of masks");
Optional<uint64_t> Mask;
// Optionally consume a mask around the shift operation.
if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
if (!isa<ConstantSDNode>(Op.getOperand(1)))
return None;
- auto ShAmt = Op.getConstantOperandVal(1);
+ uint64_t ShAmt = Op.getConstantOperandVal(1);
- if (!isPowerOf2_64(ShAmt))
+ unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
+ if (ShAmt >= Width && !isPowerOf2_64(ShAmt))
return None;
-
- // These are the unshifted masks which we use to match bit-manipulation
- // patterns. They may be shifted left in certain circumstances.
- static const uint64_t BitmanipMasks[] = {
- 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
- 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL,
- };
-
- unsigned MaskIdx = Log2_64(ShAmt);
- if (MaskIdx >= array_lengthof(BitmanipMasks))
+ // If we don't have enough masks for 64 bit, then we must be trying to
+ // match SHFL so we're only allowed to shift 1/4 of the width.
+ if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2))
return None;
- auto Src = Op.getOperand(0);
-
- unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
- auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
+ SDValue Src = Op.getOperand(0);
// The expected mask is shifted left when the AND is found around SHL
// patterns.
}
}
+ unsigned MaskIdx = Log2_32(ShAmt);
+ uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
+
if (SHLExpMask)
ExpMask <<= ShAmt;
return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
}
+// Matches any of the following bit-manipulation patterns:
+// (and (shl x, 1), (0x55555555 << 1))
+// (and (srl x, 1), 0x55555555)
+// (shl (and x, 0x55555555), 1)
+// (srl (and x, (0x55555555 << 1)), 1)
+// where the shift amount and mask may vary thus:
+// [1] = 0x55555555 / 0xAAAAAAAA
+// [2] = 0x33333333 / 0xCCCCCCCC
+// [4] = 0x0F0F0F0F / 0xF0F0F0F0
+// [8] = 0x00FF00FF / 0xFF00FF00
+// [16] = 0x0000FFFF / 0xFFFFFFFF
+// [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
+static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) {
+ // These are the unshifted masks which we use to match bit-manipulation
+ // patterns. They may be shifted left in certain circumstances.
+ static const uint64_t BitmanipMasks[] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
+ 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
+
+ return matchRISCVBitmanipPat(Op, BitmanipMasks);
+}
+
// Match the following pattern as a GREVI(W) operation
// (or (BITMANIP_SHL x), (BITMANIP_SRL x))
static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
+ assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
EVT VT = Op.getValueType();
if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
- auto LHS = matchRISCVBitmanipPat(Op.getOperand(0));
- auto RHS = matchRISCVBitmanipPat(Op.getOperand(1));
+ auto LHS = matchGREVIPat(Op.getOperand(0));
+ auto RHS = matchGREVIPat(Op.getOperand(1));
if (LHS && RHS && LHS->formsPairWith(*RHS)) {
SDLoc DL(Op);
return DAG.getNode(
// 4. (or (rotl/rotr x, bitwidth/2), x)
static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
+ assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
EVT VT = Op.getValueType();
if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
return SDValue();
SDValue OrOp0 = Op0.getOperand(0);
SDValue OrOp1 = Op0.getOperand(1);
- auto LHS = matchRISCVBitmanipPat(OrOp0);
+ auto LHS = matchGREVIPat(OrOp0);
// OR is commutable so swap the operands and try again: x might have been
// on the left
if (!LHS) {
std::swap(OrOp0, OrOp1);
- LHS = matchRISCVBitmanipPat(OrOp0);
+ LHS = matchGREVIPat(OrOp0);
}
- auto RHS = matchRISCVBitmanipPat(Op1);
+ auto RHS = matchGREVIPat(Op1);
if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
return DAG.getNode(
RISCVISD::GORCI, DL, VT, LHS->Op,
return SDValue();
}
+// Matches any of the following bit-manipulation patterns:
+// (and (shl x, 1), (0x22222222 << 1))
+// (and (srl x, 1), 0x22222222)
+// (shl (and x, 0x22222222), 1)
+// (srl (and x, (0x22222222 << 1)), 1)
+// where the shift amount and mask may vary thus:
+// [1] = 0x22222222 / 0x44444444
+// [2] = 0x0C0C0C0C / 0x3C3C3C3C
+// [4] = 0x00F000F0 / 0x0F000F00
+// [8] = 0x0000FF00 / 0x00FF0000
+// [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64)
+static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) {
+ // These are the unshifted masks which we use to match bit-manipulation
+ // patterns. They may be shifted left in certain circumstances.
+ static const uint64_t BitmanipMasks[] = {
+ 0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL,
+ 0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL};
+
+ return matchRISCVBitmanipPat(Op, BitmanipMasks);
+}
+
+// Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x)
+static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
+ EVT VT = Op.getValueType();
+
+ if (VT != MVT::i32 && VT != Subtarget.getXLenVT())
+ return SDValue();
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ // Or is commutable so canonicalize the second OR to the LHS.
+ if (Op0.getOpcode() != ISD::OR)
+ std::swap(Op0, Op1);
+ if (Op0.getOpcode() != ISD::OR)
+ return SDValue();
+
+ // We found an inner OR, so our operands are the operands of the inner OR
+ // and the other operand of the outer OR.
+ SDValue A = Op0.getOperand(0);
+ SDValue B = Op0.getOperand(1);
+ SDValue C = Op1;
+
+ auto Match1 = matchSHFLPat(A);
+ auto Match2 = matchSHFLPat(B);
+
+ // If neither matched, we failed.
+ if (!Match1 && !Match2)
+ return SDValue();
+
+ // We had at least one match. if one failed, try the remaining C operand.
+ if (!Match1) {
+ std::swap(A, C);
+ Match1 = matchSHFLPat(A);
+ if (!Match1)
+ return SDValue();
+ } else if (!Match2) {
+ std::swap(B, C);
+ Match2 = matchSHFLPat(B);
+ if (!Match2)
+ return SDValue();
+ }
+ assert(Match1 && Match2);
+
+ // Make sure our matches pair up.
+ if (!Match1->formsPairWith(*Match2))
+ return SDValue();
+
+ // All the remains is to make sure C is an AND with the same input, that masks
+ // out the bits that are being shuffled.
+ if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) ||
+ C.getOperand(0) != Match1->Op)
+ return SDValue();
+
+ uint64_t Mask = C.getConstantOperandVal(1);
+
+ static const uint64_t BitmanipMasks[] = {
+ 0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL,
+ 0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL,
+ };
+
+ unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
+ unsigned MaskIdx = Log2_32(Match1->ShAmt);
+ uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
+
+ if (Mask != ExpMask)
+ return SDValue();
+
+ SDLoc DL(Op);
+ return DAG.getNode(
+ RISCVISD::SHFLI, DL, VT, Match1->Op,
+ DAG.getTargetConstant(Match1->ShAmt, DL, Subtarget.getXLenVT()));
+}
+
// Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
// non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
// Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
return GREV;
if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget))
return GORC;
+ if (auto SHFL = combineORToSHFL(SDValue(N, 0), DCI.DAG, Subtarget))
+ return SHFL;
break;
case RISCVISD::SELECT_CC: {
// Transform
// more precise answer could be calculated for SRAW depending on known
// bits in the shift amount.
return 33;
+ case RISCVISD::SHFLI: {
+ // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word
+ // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but
+ // will stay within the upper 32 bits. If there were more than 32 sign bits
+ // before there will be at least 33 sign bits after.
+ if (Op.getValueType() == MVT::i64 &&
+ (Op.getConstantOperandVal(1) & 0x10) == 0) {
+ unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (Tmp > 32)
+ return 33;
+ }
+ break;
+ }
case RISCVISD::VMV_X_S:
// The number of sign bits of the scalar result is computed by obtaining the
// element type of the input vector operand, subtracting its width from the
NODE_NAME_CASE(GREVIW)
NODE_NAME_CASE(GORCI)
NODE_NAME_CASE(GORCIW)
+ NODE_NAME_CASE(SHFLI)
NODE_NAME_CASE(VMV_V_X_VL)
NODE_NAME_CASE(VFMV_V_F_VL)
NODE_NAME_CASE(VMV_X_S)
GREVIW,
GORCI,
GORCIW,
+ SHFLI,
// Vector Extension
// VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand
// for the VL value to be used for the operation.
def riscv_greviw : SDNode<"RISCVISD::GREVIW", SDTIntBinOp, []>;
def riscv_gorci : SDNode<"RISCVISD::GORCI", SDTIntBinOp, []>;
def riscv_gorciw : SDNode<"RISCVISD::GORCIW", SDTIntBinOp, []>;
+def riscv_shfli : SDNode<"RISCVISD::SHFLI", SDTIntBinOp, []>;
let Predicates = [HasStdExtZbp] in {
+def : Pat<(riscv_shfli GPR:$rs1, timm:$shamt), (SHFLI GPR:$rs1, timm:$shamt)>;
def : Pat<(riscv_grevi GPR:$rs1, timm:$shamt), (GREVI GPR:$rs1, timm:$shamt)>;
def : Pat<(riscv_gorci GPR:$rs1, timm:$shamt), (GORCI GPR:$rs1, timm:$shamt)>;
def : Pat<(i64 (and GPR:$rs, 0xFFFF)), (ZEXTH_RV64 GPR:$rs)>;
}
-let Predicates = [HasStdExtZbp, IsRV32] in {
-def : Pat<(or (or (and (shl GPR:$rs1, (i32 8)), (i32 0x00FF0000)),
- (and GPR:$rs1, (i32 0xFF0000FF))),
- (and (srl GPR:$rs1, (i32 8)), (i32 0x0000FF00))),
- (SHFLI GPR:$rs1, (i32 8))>;
-def : Pat<(or (or (and (shl GPR:$rs1, (i32 4)), (i32 0x0F000F00)),
- (and GPR:$rs1, (i32 0xF00FF00F))),
- (and (srl GPR:$rs1, (i32 4)), (i32 0x00F000F0))),
- (SHFLI GPR:$rs1, (i32 4))>;
-def : Pat<(or (or (and (shl GPR:$rs1, (i32 2)), (i32 0x30303030)),
- (and GPR:$rs1, (i32 0xC3C3C3C3))),
- (and (srl GPR:$rs1, (i32 2)), (i32 0x0C0C0C0C))),
- (SHFLI GPR:$rs1, (i32 2))>;
-def : Pat<(or (or (and (shl GPR:$rs1, (i32 1)), (i32 0x44444444)),
- (and GPR:$rs1, (i32 0x99999999))),
- (and (srl GPR:$rs1, (i32 1)), (i32 0x22222222))),
- (SHFLI GPR:$rs1, (i32 1))>;
-} // Predicates = [HasStdExtZbp, IsRV32]
-
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def : Pat<(or (or (and (shl GPR:$rs1, (i64 16)), (i64 0x0000FFFF00000000)),
- (and GPR:$rs1, (i64 0xFFFF00000000FFFF))),
- (and (srl GPR:$rs1, (i64 16)), (i64 0x00000000FFFF0000))),
- (SHFLI GPR:$rs1, (i64 16))>;
-def : Pat<(or (or (and (shl GPR:$rs1, (i64 8)), (i64 0x00FF000000FF0000)),
- (and GPR:$rs1, (i64 0xFF0000FFFF0000FF))),
- (and (srl GPR:$rs1, (i64 8)), (i64 0x0000FF000000FF00))),
- (SHFLI GPR:$rs1, (i64 8))>;
-def : Pat<(or (or (and (shl GPR:$rs1, (i64 4)), (i64 0x0F000F000F000F00)),
- (and GPR:$rs1, (i64 0xF00FF00FF00FF00F))),
- (and (srl GPR:$rs1, (i64 4)), (i64 0x00F000F000F000F0))),
- (SHFLI GPR:$rs1, (i64 4))>;
-def : Pat<(or (or (and (shl GPR:$rs1, (i64 2)), (i64 0x3030303030303030)),
- (and GPR:$rs1, (i64 0xC3C3C3C3C3C3C3C3))),
- (and (srl GPR:$rs1, (i64 2)), (i64 0x0C0C0C0C0C0C0C0C))),
- (SHFLI GPR:$rs1, (i64 2))>;
-def : Pat<(or (or (and (shl GPR:$rs1, (i64 1)), (i64 0x4444444444444444)),
- (and GPR:$rs1, (i64 0x9999999999999999))),
- (and (srl GPR:$rs1, (i64 1)), (i64 0x2222222222222222))),
- (SHFLI GPR:$rs1, (i64 1))>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
let Predicates = [HasStdExtZba] in {
def : Pat<(add (shl GPR:$rs1, (XLenVT 1)), GPR:$rs2),
(SH1ADD GPR:$rs1, GPR:$rs2)>;
;
; RV64IB-LABEL: shfl1_i32:
; RV64IB: # %bb.0:
-; RV64IB-NEXT: lui a1, 629146
-; RV64IB-NEXT: addiw a1, a1, -1639
-; RV64IB-NEXT: and a1, a0, a1
-; RV64IB-NEXT: slli a2, a0, 1
-; RV64IB-NEXT: lui a3, 279620
-; RV64IB-NEXT: addiw a3, a3, 1092
-; RV64IB-NEXT: and a2, a2, a3
-; RV64IB-NEXT: or a1, a2, a1
-; RV64IB-NEXT: srli a0, a0, 1
-; RV64IB-NEXT: lui a2, 139810
-; RV64IB-NEXT: addiw a2, a2, 546
-; RV64IB-NEXT: and a0, a0, a2
-; RV64IB-NEXT: or a0, a1, a0
+; RV64IB-NEXT: zip.n a0, a0
; RV64IB-NEXT: ret
;
; RV64IBP-LABEL: shfl1_i32:
; RV64IBP: # %bb.0:
-; RV64IBP-NEXT: lui a1, 629146
-; RV64IBP-NEXT: addiw a1, a1, -1639
-; RV64IBP-NEXT: and a1, a0, a1
-; RV64IBP-NEXT: slli a2, a0, 1
-; RV64IBP-NEXT: lui a3, 279620
-; RV64IBP-NEXT: addiw a3, a3, 1092
-; RV64IBP-NEXT: and a2, a2, a3
-; RV64IBP-NEXT: or a1, a2, a1
-; RV64IBP-NEXT: srli a0, a0, 1
-; RV64IBP-NEXT: lui a2, 139810
-; RV64IBP-NEXT: addiw a2, a2, 546
-; RV64IBP-NEXT: and a0, a0, a2
-; RV64IBP-NEXT: or a0, a1, a0
+; RV64IBP-NEXT: zip.n a0, a0
; RV64IBP-NEXT: ret
%and = and i32 %a, -1717986919
%shl = shl i32 %a, 1
;
; RV64IB-LABEL: shfl2_i32:
; RV64IB: # %bb.0:
-; RV64IB-NEXT: lui a1, 801852
-; RV64IB-NEXT: addiw a1, a1, 963
-; RV64IB-NEXT: and a1, a0, a1
-; RV64IB-NEXT: slli a2, a0, 2
-; RV64IB-NEXT: lui a3, 197379
-; RV64IB-NEXT: addiw a3, a3, 48
-; RV64IB-NEXT: and a2, a2, a3
-; RV64IB-NEXT: or a1, a2, a1
-; RV64IB-NEXT: srli a0, a0, 2
-; RV64IB-NEXT: lui a2, 49345
-; RV64IB-NEXT: addiw a2, a2, -1012
-; RV64IB-NEXT: and a0, a0, a2
-; RV64IB-NEXT: or a0, a0, a1
+; RV64IB-NEXT: zip2.b a0, a0
; RV64IB-NEXT: ret
;
; RV64IBP-LABEL: shfl2_i32:
; RV64IBP: # %bb.0:
-; RV64IBP-NEXT: lui a1, 801852
-; RV64IBP-NEXT: addiw a1, a1, 963
-; RV64IBP-NEXT: and a1, a0, a1
-; RV64IBP-NEXT: slli a2, a0, 2
-; RV64IBP-NEXT: lui a3, 197379
-; RV64IBP-NEXT: addiw a3, a3, 48
-; RV64IBP-NEXT: and a2, a2, a3
-; RV64IBP-NEXT: or a1, a2, a1
-; RV64IBP-NEXT: srli a0, a0, 2
-; RV64IBP-NEXT: lui a2, 49345
-; RV64IBP-NEXT: addiw a2, a2, -1012
-; RV64IBP-NEXT: and a0, a0, a2
-; RV64IBP-NEXT: or a0, a0, a1
+; RV64IBP-NEXT: zip2.b a0, a0
; RV64IBP-NEXT: ret
%and = and i32 %a, -1010580541
%shl = shl i32 %a, 2
;
; RV64IB-LABEL: shfl4_i32:
; RV64IB: # %bb.0:
-; RV64IB-NEXT: lui a1, 983295
-; RV64IB-NEXT: addiw a1, a1, 15
-; RV64IB-NEXT: and a1, a0, a1
-; RV64IB-NEXT: slli a2, a0, 4
-; RV64IB-NEXT: lui a3, 61441
-; RV64IB-NEXT: addiw a3, a3, -256
-; RV64IB-NEXT: and a2, a2, a3
-; RV64IB-NEXT: srli a0, a0, 4
-; RV64IB-NEXT: lui a3, 3840
-; RV64IB-NEXT: addiw a3, a3, 240
-; RV64IB-NEXT: and a0, a0, a3
-; RV64IB-NEXT: or a0, a0, a1
-; RV64IB-NEXT: or a0, a0, a2
+; RV64IB-NEXT: zip4.h a0, a0
; RV64IB-NEXT: ret
;
; RV64IBP-LABEL: shfl4_i32:
; RV64IBP: # %bb.0:
-; RV64IBP-NEXT: lui a1, 983295
-; RV64IBP-NEXT: addiw a1, a1, 15
-; RV64IBP-NEXT: and a1, a0, a1
-; RV64IBP-NEXT: slli a2, a0, 4
-; RV64IBP-NEXT: lui a3, 61441
-; RV64IBP-NEXT: addiw a3, a3, -256
-; RV64IBP-NEXT: and a2, a2, a3
-; RV64IBP-NEXT: srli a0, a0, 4
-; RV64IBP-NEXT: lui a3, 3840
-; RV64IBP-NEXT: addiw a3, a3, 240
-; RV64IBP-NEXT: and a0, a0, a3
-; RV64IBP-NEXT: or a0, a0, a1
-; RV64IBP-NEXT: or a0, a0, a2
+; RV64IBP-NEXT: zip4.h a0, a0
; RV64IBP-NEXT: ret
%and = and i32 %a, -267390961
%shl = shl i32 %a, 4
;
; RV64IB-LABEL: shfl8_i32:
; RV64IB: # %bb.0:
-; RV64IB-NEXT: lui a1, 1044480
-; RV64IB-NEXT: addiw a1, a1, 255
-; RV64IB-NEXT: and a1, a0, a1
-; RV64IB-NEXT: slli a2, a0, 8
-; RV64IB-NEXT: lui a3, 4080
-; RV64IB-NEXT: and a2, a2, a3
-; RV64IB-NEXT: srli a0, a0, 8
-; RV64IB-NEXT: lui a3, 16
-; RV64IB-NEXT: addiw a3, a3, -256
-; RV64IB-NEXT: and a0, a0, a3
-; RV64IB-NEXT: or a0, a1, a0
-; RV64IB-NEXT: or a0, a0, a2
+; RV64IB-NEXT: zip8.w a0, a0
; RV64IB-NEXT: ret
;
; RV64IBP-LABEL: shfl8_i32:
; RV64IBP: # %bb.0:
-; RV64IBP-NEXT: lui a1, 1044480
-; RV64IBP-NEXT: addiw a1, a1, 255
-; RV64IBP-NEXT: and a1, a0, a1
-; RV64IBP-NEXT: slli a2, a0, 8
-; RV64IBP-NEXT: lui a3, 4080
-; RV64IBP-NEXT: and a2, a2, a3
-; RV64IBP-NEXT: srli a0, a0, 8
-; RV64IBP-NEXT: lui a3, 16
-; RV64IBP-NEXT: addiw a3, a3, -256
-; RV64IBP-NEXT: and a0, a0, a3
-; RV64IBP-NEXT: or a0, a1, a0
-; RV64IBP-NEXT: or a0, a0, a2
+; RV64IBP-NEXT: zip8.w a0, a0
; RV64IBP-NEXT: ret
%and = and i32 %a, -16776961
%shl = shl i32 %a, 8