From a6f1134b095a6ac3694b0dc40c153dc3ce35e113 Mon Sep 17 00:00:00 2001 From: Dinar Temirbulatov Date: Wed, 11 Jan 2023 14:06:01 +0000 Subject: [PATCH] [AArch64][SVE] Avoid AND operation if both side are splat of i1 or PTRUE If both sides of AND operations are i1 splat_vectors or PTRUE node then we can produce just i1 splat_vector as the result. Differential Revision: https://reviews.llvm.org/D141043 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 20 ++++++ .../CodeGen/AArch64/sve-intrinsics-reinterpret.ll | 4 +- .../CodeGen/AArch64/sve-splat-one-and-ptrue.ll | 76 ++++++++++++++++++++++ 3 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-splat-one-and-ptrue.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 179eedc..d90b810 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16319,6 +16319,17 @@ static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) { return false; } +static SDValue performReinterpretCastCombine(SDNode *N) { + SDValue LeafOp = SDValue(N, 0); + SDValue Op = N->getOperand(0); + while (Op.getOpcode() == AArch64ISD::REINTERPRET_CAST && + LeafOp.getValueType() != Op.getValueType()) + Op = Op->getOperand(0); + if (LeafOp.getValueType() == Op.getValueType()) + return Op; + return SDValue(); +} + static SDValue performSVEAndCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { if (DCI.isBeforeLegalizeOps()) @@ -16365,6 +16376,13 @@ static SDValue performSVEAndCombine(SDNode *N, return DAG.getNode(Opc, DL, N->getValueType(0), And); } + // If both sides of AND operations are i1 splat_vectors then + // we can produce just i1 splat_vector as the result. + if (isAllActivePredicate(DAG, N->getOperand(0))) + return N->getOperand(1); + if (isAllActivePredicate(DAG, N->getOperand(1))) + return N->getOperand(0); + if (!EnableCombineMGatherIntrinsics) return SDValue(); @@ -21400,6 +21418,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performUzpCombine(N, DAG); case AArch64ISD::SETCC_MERGE_ZERO: return performSetccMergeZeroCombine(N, DCI); + case AArch64ISD::REINTERPRET_CAST: + return performReinterpretCastCombine(N); case AArch64ISD::GLD1_MERGE_ZERO: case AArch64ISD::GLD1_SCALED_MERGE_ZERO: case AArch64ISD::GLD1_UXTW_MERGE_ZERO: diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll index 0a24f07..4c1dfcc 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll @@ -125,9 +125,7 @@ define @reinterpret_cmpgt( %p, @chained_reinterpret() { ; CHECK-LABEL: chained_reinterpret: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ret %in = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %cast2 = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %in) diff --git a/llvm/test/CodeGen/AArch64/sve-splat-one-and-ptrue.ll b/llvm/test/CodeGen/AArch64/sve-splat-one-and-ptrue.ll new file mode 100644 index 0000000..a517eff --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-splat-one-and-ptrue.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; Ensure that a no-op 'and' get removed with vector splat of 1 or ptrue with proper constant + +define @fold_away_ptrue_and_ptrue() #0 { +; CHECK-LABEL: fold_away_ptrue_and_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ret +entry: + %0 = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %1 = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %0) + %2 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %and = and %2, %1 + ret %and +} + +define @fold_away_ptrue_and_splat_predicate() #0 { +; CHECK-LABEL: fold_away_ptrue_and_splat_predicate: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ret +entry: + %ins = insertelement undef, i1 1, i32 0 + %splat = shufflevector %ins, undef, zeroinitializer + %0 = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %splat) + %1 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %and = and %0, %1 + ret %and +} + +; Ensure that one AND operation remain for inactive lanes zeroing with 2 x i1 type (llvm.aarch64.sve.convert.to.svbool.nxv2i1). +define @fold_away_ptrue_and_convert_to() #0 { +; CHECK-LABEL: fold_away_ptrue_and_convert_to: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b +; CHECK-NEXT: ret +entry: + %0 = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %1 = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %0) + %2 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %3 = call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %2) + %4 = call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %3) + %and = and %4, %1 + ret %and +} + +define @fold_away_two_similar() #0 { +; CHECK-LABEL: fold_away_two_similar: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ret +entry: + %0 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %1 = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %and = and %0, %1 + ret %and +} + +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32 immarg) + +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg) + +declare @llvm.aarch64.sve.convert.from.svbool.nxv2i1() + +declare @llvm.aarch64.sve.convert.to.svbool.nxv2i1() + + +attributes #0 = { "target-features"="+sve" } -- 2.7.4