MAKE_CASE(AArch64ISD::UUNPKLO)
MAKE_CASE(AArch64ISD::INSR)
MAKE_CASE(AArch64ISD::PTEST)
+ MAKE_CASE(AArch64ISD::PTEST_ANY)
MAKE_CASE(AArch64ISD::PTRUE)
MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
}
// Set condition code (CC) flags.
- SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op);
+ SDValue Test = DAG.getNode(
+ Cond == AArch64CC::ANY_ACTIVE ? AArch64ISD::PTEST_ANY : AArch64ISD::PTEST,
+ DL, MVT::Other, Pg, Op);
// Convert CC to integer based on requested condition.
// NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
INSR,
PTEST,
+ PTEST_ANY,
PTRUE,
BITREVERSE_MERGE_PASSTHRU,
default:
break;
case AArch64::PTEST_PP:
+ case AArch64::PTEST_PP_ANY:
SrcReg = MI.getOperand(0).getReg();
SrcReg2 = MI.getOperand(1).getReg();
// Not sure about the mask and value for now...
// For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
// PTEST_LIKE instruction uses the same all active mask and the element
- // size matches.
+ // size matches. If the PTEST has a condition of any then it is always
+ // redundant.
if (PredIsPTestLike) {
auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
- if (Mask != PTestLikeMask)
+ if (Mask != PTestLikeMask && PTest->getOpcode() != AArch64::PTEST_PP_ANY)
return false;
}
// Fallthough to simply remove the PTEST.
+ } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike) &&
+ PTest->getOpcode() == AArch64::PTEST_PP_ANY) {
+ // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
+ // instruction that sets the flags as PTEST would. This is only valid when
+ // the condition is any.
+
+ // Fallthough to simply remove the PTEST.
} else if (PredIsPTestLike) {
// For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
// flags are set based on the same mask 'PG', but PTEST_LIKE must operate
// where the compare generates a canonical all active 32-bit predicate
// (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
// active flag, whereas the PTEST instruction with the same mask doesn't.
+ // For PTEST_ANY this doesn't apply as the flags in this case would be
+ // identical regardless of element size.
auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
if ((Mask != PTestLikeMask) ||
- (PredElementSize != AArch64::ElementSizeB))
+ (PredElementSize != AArch64::ElementSizeB &&
+ PTest->getOpcode() != AArch64::PTEST_PP_ANY))
return false;
// Fallthough to simply remove the PTEST.
return true;
}
- if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
+ if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
+ CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
if (SrcReg2 != 0)
def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;
+def AArch64ptest_any : SDNode<"AArch64ISD::PTEST_ANY", SDT_AArch64PTest>;
def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 3>, SDTCisVec<1>, SDTCVecEltisVT<1,i1>]>;
def AArch64dup_mt : SDNode<"AArch64ISD::DUP_MERGE_PASSTHRU", SDT_AArch64DUP_PRED>;
defm BRKB_PPmP : sve_int_break_m<0b101, "brkb", int_aarch64_sve_brkb>;
defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs", null_frag>;
- def PTEST_PP : sve_int_ptest<0b010000, "ptest", AArch64ptest>;
- defm PFALSE : sve_int_pfalse<0b000000, "pfalse">;
- defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
- defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
+ defm PTEST_PP : sve_int_ptest<0b010000, "ptest", AArch64ptest, AArch64ptest_any>;
+ defm PFALSE : sve_int_pfalse<0b000000, "pfalse">;
+ defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
+ defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
defm AND_PPzPP : sve_int_pred_log_v2<0b0000, "and", int_aarch64_sve_and_z, and>;
defm BIC_PPzPP : sve_int_pred_log_v2<0b0001, "bic", int_aarch64_sve_bic_z, AArch64bic>;
let isCompare = 1;
}
+multiclass sve_int_ptest<bits<6> opc, string asm, SDPatternOperator op,
+ SDPatternOperator op_any> {
+ def NAME : sve_int_ptest<opc, asm, op>;
+
+ let hasNoSchedulingInfo = 1, isCompare = 1, Defs = [NZCV] in {
+ def _ANY : Pseudo<(outs), (ins PPRAny:$Pg, PPR8:$Pn),
+ [(op_any (nxv16i1 PPRAny:$Pg), (nxv16i1 PPR8:$Pn))]>,
+ PseudoInstExpansion<(!cast<Instruction>(NAME) PPRAny:$Pg, PPR8:$Pn)>;
+ }
+}
+
class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm,
PPRRegOp pprty>
: I<(outs pprty:$Pdn), (ins PPRAny:$Pg, pprty:$_Pdn),
define i32 @cmpeq_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmpeq_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpeq.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
define i32 @cmpeq_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmpeq_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
define i32 @cmpeq_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpeq_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
define i32 @cmpeq_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpeq_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
define i32 @cmpge_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmpge_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
define i32 @cmpge_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmpge_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
define i32 @cmpge_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpge_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p1.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmpge p0.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
define i32 @cmpge_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpge_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
define i32 @cmpgt_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmpgt_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpgt.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
define i32 @cmpgt_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmpgt_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cmpgt p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
define i32 @cmpgt_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpgt_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpgt p1.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmpgt p0.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
define i32 @cmpgt_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpgt_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
define i32 @cmphi_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmphi_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmphi.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
define i32 @cmphi_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmphi_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cmphi p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
define i32 @cmphi_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmphi_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphi p1.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmphi p0.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
define i32 @cmphi_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmphi_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
define i32 @cmphs_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmphs_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphs p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmphs.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
define i32 @cmphs_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmphs_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cmphs p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
define i32 @cmphs_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmphs_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphs p1.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmphs p0.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
define i32 @cmphs_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmphs_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmphs p1.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmphs p0.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
define i32 @cmple_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmple_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cmple p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)
define i32 @cmple_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmple_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmple p1.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmple p0.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
define i32 @cmple_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmple_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmple p1.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmple p0.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
define i1 @cmp32_ptest_any_px(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_any_px:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpge p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
; CHECK-LABEL: cmp8_ptest_any_xx:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
; CHECK-LABEL: cmp32_ptest_any_xx:
; CHECK: // %bb.0:
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p0, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
define i1 @cmp8_ptest_any_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: cmp8_ptest_any_ax:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
define i1 @cmp32_ptest_any_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmp32_ptest_any_ax:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
define i32 @cmplo_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmplo_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cmplo p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)
define i32 @cmplo_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmplo_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmplo p1.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmplo p0.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
define i32 @cmplo_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmplo_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmplo p1.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmplo p0.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
define i32 @cmpls_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmpls_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cmpls p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)
define i32 @cmpls_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpls_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpls p1.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmpls p0.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
define i32 @cmpls_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpls_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpls p1.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmpls p0.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
define i32 @cmplt_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmplt_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cmplt p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)
define i32 @cmplt_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmplt_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmplt p1.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmplt p0.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
define i32 @cmplt_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmplt_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmplt p1.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmplt p0.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
define i32 @cmpne_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: cmpne_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
define i32 @cmpne_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
; CHECK-LABEL: cmpne_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)
define i32 @cmpne_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpne_wide_nxv8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
define i32 @cmpne_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: cmpne_wide_nxv4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, z1.d
-; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z1.d
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
define i32 @match_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: match_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: match p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.match.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
define i32 @nmatch_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: nmatch_imm_nxv16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: nmatch p0.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.nmatch.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
; CHECK-LABEL: pnext_2:
; CHECK: // %bb.0:
; CHECK-NEXT: pnext p1.d, p0, p1.d
-; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.pnext.nxv2i1(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %a)
; CHECK-LABEL: pnext_4:
; CHECK: // %bb.0:
; CHECK-NEXT: pnext p1.s, p0, p1.s
-; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.pnext.nxv4i1(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %a)
; CHECK-LABEL: pnext_8:
; CHECK: // %bb.0:
; CHECK-NEXT: pnext p1.h, p0, p1.h
-; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.pnext.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %a)
; CHECK-LABEL: sve_cmplt_setcc:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cmplt p1.h, p0/z, z0.h, #0
-; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: b.eq .LBB0_2
; CHECK-NEXT: // %bb.1: // %if.then
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-LABEL: sve_cmplt_setcc_inverted:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cmplt p1.h, p0/z, z0.h, #0
-; CHECK-NEXT: ptest p0, p1.b
; CHECK-NEXT: b.ne .LBB1_2
; CHECK-NEXT: // %bb.1: // %if.then
; CHECK-NEXT: st1h { z0.h }, p0, [x0]