return IC.replaceInstUsesWith(II, NPN);
}
+// (from_svbool (binop (to_svbool pred) (svbool_t _) (svbool_t _))))
+// => (binop (pred) (from_svbool _) (from_svbool _))
+//
+// The above transformation eliminates a `to_svbool` in the predicate
+// operand of bitwise operation `binop` by narrowing the vector width of
+// the operation. For example, it would convert a `<vscale x 16 x i1>
+// and` into a `<vscale x 4 x i1> and`. This is profitable because
+// to_svbool must zero the new lanes during widening, whereas
+// from_svbool is free.
+static Optional<Instruction *> tryCombineFromSVBoolBinOp(InstCombiner &IC,
+ IntrinsicInst &II) {
+ auto BinOp = dyn_cast<IntrinsicInst>(II.getOperand(0));
+ if (!BinOp)
+ return None;
+
+ auto IntrinsicID = BinOp->getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::aarch64_sve_and_z:
+ case Intrinsic::aarch64_sve_bic_z:
+ case Intrinsic::aarch64_sve_eor_z:
+ case Intrinsic::aarch64_sve_nand_z:
+ case Intrinsic::aarch64_sve_nor_z:
+ case Intrinsic::aarch64_sve_orn_z:
+ case Intrinsic::aarch64_sve_orr_z:
+ break;
+ default:
+ return None;
+ }
+
+ auto BinOpPred = BinOp->getOperand(0);
+ auto BinOpOp1 = BinOp->getOperand(1);
+ auto BinOpOp2 = BinOp->getOperand(2);
+
+ auto PredIntr = dyn_cast<IntrinsicInst>(BinOpPred);
+ if (!PredIntr ||
+ PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
+ return None;
+
+ auto PredOp = PredIntr->getOperand(0);
+ auto PredOpTy = cast<VectorType>(PredOp->getType());
+ if (PredOpTy != II.getType())
+ return None;
+
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+
+ SmallVector<Value *> NarrowedBinOpArgs = {PredOp};
+ auto NarrowBinOpOp1 = Builder.CreateIntrinsic(
+ Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
+ NarrowedBinOpArgs.push_back(NarrowBinOpOp1);
+ if (BinOpOp1 == BinOpOp2)
+ NarrowedBinOpArgs.push_back(NarrowBinOpOp1);
+ else
+ NarrowedBinOpArgs.push_back(Builder.CreateIntrinsic(
+ Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
+
+ auto NarrowedBinOp =
+ Builder.CreateIntrinsic(IntrinsicID, {PredOpTy}, NarrowedBinOpArgs);
+ return IC.replaceInstUsesWith(II, NarrowedBinOp);
+}
+
static Optional<Instruction *> instCombineConvertFromSVBool(InstCombiner &IC,
IntrinsicInst &II) {
// If the reinterpret instruction operand is a PHI Node
if (isa<PHINode>(II.getArgOperand(0)))
return processPhiNode(IC, II);
+ if (auto BinOpCombine = tryCombineFromSVBoolBinOp(IC, II))
+ return BinOpCombine;
+
SmallVector<Instruction *, 32> CandidatesForRemoval;
Value *Cursor = II.getOperand(0), *EarliestReplacement = nullptr;
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 4 x i1> @try_combine_svbool_binop_and_0(<vscale x 4 x i1> %a, <vscale x 16 x i1> %b, <vscale x 16 x i1> %c) {
+; CHECK-LABEL: @try_combine_svbool_binop_and_0(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[C:%.*]])
+; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.and.z.nxv4i1(<vscale x 4 x i1> [[A:%.*]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i1> [[TMP2]])
+; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP3]]
+;
+ %t1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
+ %t2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> %t1, <vscale x 16 x i1> %b, <vscale x 16 x i1> %c)
+ %t3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %t2)
+ ret <vscale x 4 x i1> %t3
+}
+
+define <vscale x 8 x i1> @try_combine_svbool_binop_and_1(<vscale x 8 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @try_combine_svbool_binop_and_1(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.and.z.nxv8i1(<vscale x 8 x i1> [[A:%.*]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i1> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 8 x i1> [[TMP2]]
+;
+ %t1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
+ %t2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> %t1, <vscale x 16 x i1> %b, <vscale x 16 x i1> %b)
+ %t3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %t2)
+ ret <vscale x 8 x i1> %t3
+}
+
+define <vscale x 4 x i1> @try_combine_svbool_binop_and_2(<vscale x 4 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @try_combine_svbool_binop_and_2(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.and.z.nxv4i1(<vscale x 4 x i1> [[A:%.*]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i1> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP2]]
+;
+ %t1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
+ %t2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> %t1, <vscale x 16 x i1> %b, <vscale x 16 x i1> %b)
+ %t3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %t2)
+ ret <vscale x 4 x i1> %t3
+}
+
+define <vscale x 2 x i1> @try_combine_svbool_binop_and_3(<vscale x 2 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @try_combine_svbool_binop_and_3(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.and.z.nxv2i1(<vscale x 2 x i1> [[A:%.*]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i1> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 2 x i1> [[TMP2]]
+;
+ %t1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
+ %t2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> %t1, <vscale x 16 x i1> %b, <vscale x 16 x i1> %b)
+ %t3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %t2)
+ ret <vscale x 2 x i1> %t3
+}
+
+define <vscale x 8 x i1> @try_combine_svbool_binop_bic(<vscale x 8 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @try_combine_svbool_binop_bic(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.bic.z.nxv8i1(<vscale x 8 x i1> [[A:%.*]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i1> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 8 x i1> [[TMP2]]
+;
+ %t1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
+ %t2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.bic.z.nxv16i1(<vscale x 16 x i1> %t1, <vscale x 16 x i1> %b, <vscale x 16 x i1> %b)
+ %t3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %t2)
+ ret <vscale x 8 x i1> %t3
+}
+
+define <vscale x 8 x i1> @try_combine_svbool_binop_eor(<vscale x 8 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @try_combine_svbool_binop_eor(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.eor.z.nxv8i1(<vscale x 8 x i1> [[A:%.*]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i1> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 8 x i1> [[TMP2]]
+;
+ %t1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
+ %t2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.eor.z.nxv16i1(<vscale x 16 x i1> %t1, <vscale x 16 x i1> %b, <vscale x 16 x i1> %b)
+ %t3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %t2)
+ ret <vscale x 8 x i1> %t3
+}
+
+define <vscale x 8 x i1> @try_combine_svbool_binop_nand(<vscale x 8 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @try_combine_svbool_binop_nand(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.nand.z.nxv8i1(<vscale x 8 x i1> [[A:%.*]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i1> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 8 x i1> [[TMP2]]
+;
+ %t1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
+ %t2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.nand.z.nxv16i1(<vscale x 16 x i1> %t1, <vscale x 16 x i1> %b, <vscale x 16 x i1> %b)
+ %t3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %t2)
+ ret <vscale x 8 x i1> %t3
+}
+
+define <vscale x 8 x i1> @try_combine_svbool_binop_nor(<vscale x 8 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @try_combine_svbool_binop_nor(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.nor.z.nxv8i1(<vscale x 8 x i1> [[A:%.*]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i1> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 8 x i1> [[TMP2]]
+;
+ %t1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
+ %t2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.nor.z.nxv16i1(<vscale x 16 x i1> %t1, <vscale x 16 x i1> %b, <vscale x 16 x i1> %b)
+ %t3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %t2)
+ ret <vscale x 8 x i1> %t3
+}
+
+define <vscale x 8 x i1> @try_combine_svbool_binop_orn(<vscale x 8 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @try_combine_svbool_binop_orn(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.orn.z.nxv8i1(<vscale x 8 x i1> [[A:%.*]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i1> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 8 x i1> [[TMP2]]
+;
+ %t1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
+ %t2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orn.z.nxv16i1(<vscale x 16 x i1> %t1, <vscale x 16 x i1> %b, <vscale x 16 x i1> %b)
+ %t3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %t2)
+ ret <vscale x 8 x i1> %t3
+}
+
+define <vscale x 8 x i1> @try_combine_svbool_binop_orr(<vscale x 8 x i1> %a, <vscale x 16 x i1> %b) {
+; CHECK-LABEL: @try_combine_svbool_binop_orr(
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[B:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.orr.z.nxv8i1(<vscale x 8 x i1> [[A:%.*]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i1> [[TMP1]])
+; CHECK-NEXT: ret <vscale x 8 x i1> [[TMP2]]
+;
+ %t1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
+ %t2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> %t1, <vscale x 16 x i1> %b, <vscale x 16 x i1> %b)
+ %t3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %t2)
+ ret <vscale x 8 x i1> %t3
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.bic.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.eor.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.nand.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.nor.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.orn.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
+
+attributes #0 = { "target-features"="+sve" }