// value for the comparison. When selecting through a .td file, a type
// error is raised. Must check this first so we never break on the
// !Subtarget->isISA3_1() check.
- if (N->getConstantOperandVal(0) == Intrinsic::ppc_fsels) {
+ auto IntID = N->getConstantOperandVal(0);
+ if (IntID == Intrinsic::ppc_fsels) {
SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};
CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
return;
}
+ if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
+ auto Pred = N->getConstantOperandVal(1);
+ unsigned Opcode =
+ IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
+ unsigned SubReg = 0;
+ unsigned ShiftVal = 0;
+ bool Reverse = false;
+ switch (Pred) {
+ case 0:
+ SubReg = PPC::sub_eq;
+ ShiftVal = 1;
+ break;
+ case 1:
+ SubReg = PPC::sub_eq;
+ ShiftVal = 1;
+ Reverse = true;
+ break;
+ case 2:
+ SubReg = PPC::sub_lt;
+ ShiftVal = 3;
+ break;
+ case 3:
+ SubReg = PPC::sub_lt;
+ ShiftVal = 3;
+ Reverse = true;
+ break;
+ case 4:
+ SubReg = PPC::sub_gt;
+ ShiftVal = 2;
+ break;
+ case 5:
+ SubReg = PPC::sub_gt;
+ ShiftVal = 2;
+ Reverse = true;
+ break;
+ case 6:
+ SubReg = PPC::sub_un;
+ break;
+ case 7:
+ SubReg = PPC::sub_un;
+ Reverse = true;
+ break;
+ }
+
+ EVT VTs[] = {MVT::v16i8, MVT::Glue};
+ SDValue Ops[] = {N->getOperand(2), N->getOperand(3),
+ CurDAG->getTargetConstant(0, dl, MVT::i32)};
+ SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);
+ SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
+ // On Power10, we can use SETBC[R]. On prior architectures, we have to use
+ // MFOCRF and shift/negate the value.
+ if (Subtarget->isISA3_1()) {
+ SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
+ SDValue CRBit = SDValue(
+ CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
+ CR6Reg, SubRegIdx, BCDOp.getValue(1)),
+ 0);
+ CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
+ CRBit);
+ } else {
+ SDValue Move =
+ SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
+ BCDOp.getValue(1)),
+ 0);
+ SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),
+ getI32Imm(31, dl), getI32Imm(31, dl)};
+ if (!Reverse)
+ CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ else {
+ SDValue Shift = SDValue(
+ CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
+ CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));
+ }
+ }
+ return;
+ }
+
if (!Subtarget->isISA3_1())
break;
unsigned Opcode = 0;
- switch (N->getConstantOperandVal(0)) {
+ switch (IntID) {
default:
break;
case Intrinsic::ppc_altivec_vstribr_p:
} // end HasAltivec
+// [PO VRT VRA VRB 1 PS XO], "_o" means CR6 is set.
+class VX_VT5_VA5_VB5_PS1_XO9_o<bits<9> xo, string opc, list<dag> pattern>
+ : VX_RD5_RSp5_PS1_XO9<xo,
+ (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u1imm:$PS),
+ !strconcat(opc, " $vD, $vA, $vB, $PS"), IIC_VecFP, pattern> {
+ let Defs = [CR6];
+}
+
+// [PO VRT VRA VRB 1 / XO]
+class VX_VT5_VA5_VB5_XO9_o<bits<9> xo, string opc, list<dag> pattern>
+ : VX_RD5_RSp5_PS1_XO9<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern> {
+ let Defs = [CR6];
+ let PS = 0;
+}
+
def HasP8Altivec : Predicate<"Subtarget->hasP8Altivec()">;
def HasP8Crypto : Predicate<"Subtarget->hasP8Crypto()">;
let Predicates = [HasP8Altivec] in {
v2i64, v4i32>;
def VUPKLSW : VX2_Int_Ty2<1742, "vupklsw", int_ppc_altivec_vupklsw,
v2i64, v4i32>;
+def BCDADD_rec : VX_VT5_VA5_VB5_PS1_XO9_o<1, "bcdadd." , []>;
+def BCDSUB_rec : VX_VT5_VA5_VB5_PS1_XO9_o<65, "bcdsub." , []>;
+
+def : Pat<(v16i8 (int_ppc_bcdadd v16i8:$vA, v16i8:$vB, timm:$PS)),
+ (BCDADD_rec $vA, $vB, $PS)>;
+def : Pat<(v16i8 (int_ppc_bcdsub v16i8:$vA, v16i8:$vB, timm:$PS)),
+ (BCDSUB_rec $vA, $vB, $PS)>;
// Shuffle patterns for unary and swapped (LE) vector pack modulo.
def:Pat<(vpkudum_unary_shuffle v16i8:$vA, undef),
def BCDSETSGN_rec : VX_VT5_EO5_VB5_PS1_XO9_o<31, 385, "bcdsetsgn.", []>;
-// [PO VRT VRA VRB 1 PS XO], "_o" means CR6 is set.
-class VX_VT5_VA5_VB5_PS1_XO9_o<bits<9> xo, string opc, list<dag> pattern>
- : VX_RD5_RSp5_PS1_XO9<xo,
- (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u1imm:$PS),
- !strconcat(opc, " $vD, $vA, $vB, $PS"), IIC_VecFP, pattern> {
- let Defs = [CR6];
-}
-
-// [PO VRT VRA VRB 1 / XO]
-class VX_VT5_VA5_VB5_XO9_o<bits<9> xo, string opc, list<dag> pattern>
- : VX_RD5_RSp5_PS1_XO9<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern> {
- let Defs = [CR6];
- let PS = 0;
-}
-
// Decimal Shift/Unsigned-Shift/Shift-and-Round
def BCDS_rec : VX_VT5_VA5_VB5_PS1_XO9_o<193, "bcds." , []>;
def BCDUS_rec : VX_VT5_VA5_VB5_XO9_o <129, "bcdus.", []>;
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-P9
+
+define dso_local i64 @test_invalid(<16 x i8> %a) local_unnamed_addr #0 {
+; CHECK-LABEL: test_invalid:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdsub. v2, v2, v2, 0
+; CHECK-NEXT: setbc r3, 4*cr6+un
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_invalid:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdsub. v2, v2, v2, 0
+; CHECK-P9-NEXT: mfocrf r3, 2
+; CHECK-P9-NEXT: rlwinm r3, r3, 28, 31, 31
+; CHECK-P9-NEXT: extsw r3, r3
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.bcdsub.p(i32 6, <16 x i8> %a, <16 x i8> %a) #2
+ %conv.i = sext i32 %0 to i64
+ ret i64 %conv.i
+}
+
+define dso_local <16 x i8> @test_add(<16 x i8> %a, <16 x i8> %b, i64 %ps) local_unnamed_addr #0 {
+; CHECK-LABEL: test_add:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdadd. v2, v2, v3, 1
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_add:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdadd. v2, v2, v3, 1
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call <16 x i8> @llvm.ppc.bcdadd(<16 x i8> %a, <16 x i8> %b, i32 1)
+ ret <16 x i8> %0
+}
+
+define dso_local i64 @test_add_ofl(<16 x i8> %a, <16 x i8> %b, i64 %ps) local_unnamed_addr #0 {
+; CHECK-LABEL: test_add_ofl:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdadd. v2, v2, v3, 0
+; CHECK-NEXT: setbc r3, 4*cr6+un
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_add_ofl:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdadd. v2, v2, v3, 0
+; CHECK-P9-NEXT: mfocrf r3, 2
+; CHECK-P9-NEXT: rlwinm r3, r3, 28, 31, 31
+; CHECK-P9-NEXT: extsw r3, r3
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.bcdadd.p(i32 6, <16 x i8> %a, <16 x i8> %b) #2
+ %conv.i = sext i32 %0 to i64
+ ret i64 %conv.i
+}
+
+define dso_local <16 x i8> @test_sub(<16 x i8> %a, <16 x i8> %b, i64 %ps) local_unnamed_addr #0 {
+; CHECK-LABEL: test_sub:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_sub:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call <16 x i8> @llvm.ppc.bcdsub(<16 x i8> %a, <16 x i8> %b, i32 0)
+ ret <16 x i8> %0
+}
+
+define dso_local i64 @test_sub_ofl(<16 x i8> %a, <16 x i8> %b, i64 %ps) local_unnamed_addr #0 {
+; CHECK-LABEL: test_sub_ofl:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-NEXT: setbc r3, 4*cr6+un
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_sub_ofl:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-P9-NEXT: mfocrf r3, 2
+; CHECK-P9-NEXT: rlwinm r3, r3, 28, 31, 31
+; CHECK-P9-NEXT: extsw r3, r3
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.bcdsub.p(i32 6, <16 x i8> %a, <16 x i8> %b) #2
+ %conv.i = sext i32 %0 to i64
+ ret i64 %conv.i
+}
+
+define dso_local i64 @test_cmplt(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_cmplt:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-NEXT: setbc r3, 4*cr6+lt
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_cmplt:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-P9-NEXT: mfocrf r3, 2
+; CHECK-P9-NEXT: rlwinm r3, r3, 25, 31, 31
+; CHECK-P9-NEXT: extsw r3, r3
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.bcdsub.p(i32 2, <16 x i8> %a, <16 x i8> %b) #2
+ %conv.i = sext i32 %0 to i64
+ ret i64 %conv.i
+}
+
+define dso_local i64 @test_cmpgt(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_cmpgt:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-NEXT: setbc r3, 4*cr6+gt
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_cmpgt:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-P9-NEXT: mfocrf r3, 2
+; CHECK-P9-NEXT: rlwinm r3, r3, 26, 31, 31
+; CHECK-P9-NEXT: extsw r3, r3
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.bcdsub.p(i32 4, <16 x i8> %a, <16 x i8> %b) #2
+ %conv.i = sext i32 %0 to i64
+ ret i64 %conv.i
+}
+
+define dso_local i64 @test_cmpeq(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_cmpeq:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-NEXT: setbc r3, 4*cr6+eq
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_cmpeq:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-P9-NEXT: mfocrf r3, 2
+; CHECK-P9-NEXT: rlwinm r3, r3, 27, 31, 31
+; CHECK-P9-NEXT: extsw r3, r3
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.bcdsub.p(i32 0, <16 x i8> %a, <16 x i8> %b) #2
+ %conv.i = sext i32 %0 to i64
+ ret i64 %conv.i
+}
+
+define dso_local i64 @test_cmpge(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_cmpge:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-NEXT: setbcr r3, 4*cr6+lt
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_cmpge:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-P9-NEXT: mfocrf r3, 2
+; CHECK-P9-NEXT: rlwinm r3, r3, 25, 31, 31
+; CHECK-P9-NEXT: xori r3, r3, 1
+; CHECK-P9-NEXT: extsw r3, r3
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.bcdsub.p(i32 3, <16 x i8> %a, <16 x i8> %b) #2
+ %conv.i = sext i32 %0 to i64
+ ret i64 %conv.i
+}
+
+define dso_local i64 @test_cmple(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_cmple:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-NEXT: setbcr r3, 4*cr6+gt
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_cmple:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-P9-NEXT: mfocrf r3, 2
+; CHECK-P9-NEXT: rlwinm r3, r3, 26, 31, 31
+; CHECK-P9-NEXT: xori r3, r3, 1
+; CHECK-P9-NEXT: extsw r3, r3
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.bcdsub.p(i32 5, <16 x i8> %a, <16 x i8> %b) #2
+ %conv.i = sext i32 %0 to i64
+ ret i64 %conv.i
+}
+
+declare i32 @llvm.ppc.bcdsub.p(i32 immarg, <16 x i8>, <16 x i8>) #1
+declare i32 @llvm.ppc.bcdadd.p(i32 immarg, <16 x i8>, <16 x i8>) #1
+declare <16 x i8> @llvm.ppc.bcdadd(<16 x i8>, <16 x i8>, i32 immarg) #1
+declare <16 x i8> @llvm.ppc.bcdsub(<16 x i8>, <16 x i8>, i32 immarg) #1