static Optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
IntrinsicInst &II) {
- IntrinsicInst *Op1 = dyn_cast<IntrinsicInst>(II.getArgOperand(0));
- IntrinsicInst *Op2 = dyn_cast<IntrinsicInst>(II.getArgOperand(1));
+ IntrinsicInst *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(0));
+ IntrinsicInst *Op = dyn_cast<IntrinsicInst>(II.getArgOperand(1));
- if (!Op1 || !Op2)
+ if (!Pg || !Op)
return None;
+ Intrinsic::ID OpIID = Op->getIntrinsicID();
+
IRBuilder<> Builder(II.getContext());
Builder.SetInsertPoint(&II);
- if (Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
- Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
- Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) {
- Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)};
- Type *Tys[] = {Op1->getArgOperand(0)->getType()};
+ if (Pg->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
+ OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
+ Pg->getArgOperand(0)->getType() == Op->getArgOperand(0)->getType()) {
+ Value *Ops[] = {Pg->getArgOperand(0), Op->getArgOperand(0)};
+ Type *Tys[] = {Pg->getArgOperand(0)->getType()};
auto *PTest = Builder.CreateIntrinsic(II.getIntrinsicID(), Tys, Ops);
// Transform PTEST_ANY(X=OP(PG,...), X) -> PTEST_ANY(PG, X)).
// Later optimizations may rewrite sequence to use the flag-setting variant
// of instruction X to remove PTEST.
- if ((Op1 == Op2) &&
- (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
- ((Op1->getIntrinsicID() == Intrinsic::aarch64_sve_brkb_z) ||
- (Op1->getIntrinsicID() == Intrinsic::aarch64_sve_rdffr_z))) {
- Value *Ops[] = {Op1->getArgOperand(0), Op1};
- Type *Tys[] = {Op1->getType()};
+ if ((Pg == Op) && (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
+ ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
+ (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
+ (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
+ (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
+ (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
+ (OpIID == Intrinsic::aarch64_sve_and_z) ||
+ (OpIID == Intrinsic::aarch64_sve_bic_z) ||
+ (OpIID == Intrinsic::aarch64_sve_eor_z) ||
+ (OpIID == Intrinsic::aarch64_sve_nand_z) ||
+ (OpIID == Intrinsic::aarch64_sve_nor_z) ||
+ (OpIID == Intrinsic::aarch64_sve_orn_z) ||
+ (OpIID == Intrinsic::aarch64_sve_orr_z))) {
+ Value *Ops[] = {Pg->getArgOperand(0), Pg};
+ Type *Tys[] = {Pg->getType()};
auto *PTest = Builder.CreateIntrinsic(II.getIntrinsicID(), Tys, Ops);
PTest->takeName(&II);
ret i1 %out
}
-; Rewrite PTEST_ANY(X=OP(PG,...), X) -> PTEST_ANY(PG, X)).
-define i1 @ptest_any_brkb_z(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
-; CHECK-LABEL: @ptest_any_brkb_z(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkb.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[A:%.*]])
-; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[TMP1]])
-; CHECK-NEXT: ret i1 [[OUT]]
-;
- %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkb.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
- %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
- ret i1 %out
-}
-
-define i1 @ptest_any_rdffr_z(<vscale x 16 x i1> %pg) {
-; CHECK-LABEL: @ptest_any_rdffr_z(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[PG:%.*]])
-; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[TMP1]])
-; CHECK-NEXT: ret i1 [[OUT]]
-;
- %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> %pg)
- %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
- ret i1 %out
-}
-
define i1 @ptest_first(<vscale x 4 x i1> %a) #0 {
; CHECK-LABEL: @ptest_first(
; CHECK-NEXT: [[MASK:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 0)
ret i1 %out
}
+; Rewrite PTEST_ANY(X=OP(PG,...), X) -> PTEST_ANY(PG, X)).
+
+define i1 @ptest_any_brka_z(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
+; CHECK-LABEL: @ptest_any_brka_z(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brka.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[A:%.*]])
+; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT: ret i1 [[OUT]]
+;
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brka.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
+ %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
+ ret i1 %out
+}
+
+define i1 @ptest_any_brkpa_z(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @ptest_any_brkpa_z(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkpa.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT: ret i1 [[OUT]]
+;
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkpa.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+ %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
+ ret i1 %out
+}
+
+define i1 @ptest_any_brkb_z(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
+; CHECK-LABEL: @ptest_any_brkb_z(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkb.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[A:%.*]])
+; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT: ret i1 [[OUT]]
+;
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkb.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a)
+ %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
+ ret i1 %out
+}
+
+define i1 @ptest_any_brkpb_z(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @ptest_any_brkpb_z(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkpb.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT: ret i1 [[OUT]]
+;
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkpb.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+ %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
+ ret i1 %out
+}
+
+define i1 @ptest_any_rdffr_z(<vscale x 16 x i1> %pg) {
+; CHECK-LABEL: @ptest_any_rdffr_z(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[PG:%.*]])
+; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT: ret i1 [[OUT]]
+;
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> %pg)
+ %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
+ ret i1 %out
+}
+
+define i1 @ptest_any_and_z(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @ptest_any_and_z(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+ %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
+ ret i1 %2
+}
+
+define i1 @ptest_any_bic_z(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @ptest_any_bic_z(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.bic.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.bic.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+ %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
+ ret i1 %2
+}
+
+define i1 @ptest_any_eor_z(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @ptest_any_eor_z(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.eor.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.eor.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+ %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
+ ret i1 %2
+}
+
+define i1 @ptest_any_nand_z(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @ptest_any_nand_z(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.nand.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.nand.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+ %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
+ ret i1 %2
+}
+
+define i1 @ptest_any_nor_z(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @ptest_any_nor_z(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.nor.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.nor.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+ %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
+ ret i1 %2
+}
+
+define i1 @ptest_any_orn_z(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @ptest_any_orn_z(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orn.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orn.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+ %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
+ ret i1 %2
+}
+
+define i1 @ptest_any_orr_z(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @ptest_any_orr_z(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[A:%.*]], <vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG]], <vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b)
+ %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %1)
+ ret i1 %2
+}
+
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.brka.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.brkb.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.brkpa.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.brkpb.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
declare <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.bic.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.eor.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.nand.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.nor.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.orn.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+
attributes #0 = { "target-features"="+sve" }