From 44e2c6a428cca8a50c620c589e77f46f768eb0d7 Mon Sep 17 00:00:00 2001 From: Dinar Temirbulatov Date: Fri, 18 Nov 2022 16:21:10 +0000 Subject: [PATCH] [AArch64][SVE] Use PTRUE instruction instead of WHILELO if the range is appropriate for predicator constant. While get_active_lane_mask lowering it uses WHILELO instruction, but forconstant range suitable for PTRUE then we could issue PTRUE instruction instead. Differential Revision: https://reviews.llvm.org/D137547 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 16 ++++++++ llvm/test/CodeGen/AArch64/active_lane_mask.ll | 52 +++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8b3487d..bdaf22a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4749,6 +4749,22 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1)))); return SDValue(); } + case Intrinsic::aarch64_sve_whilelo: { + if (isa(Op.getOperand(1)) && + isa(Op.getOperand(2))) { + unsigned MinSVEVectorSize = + std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u); + unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements(); + unsigned NumActiveElems = + Op.getConstantOperandVal(2) - Op.getConstantOperandVal(1); + Optional PredPattern = + getSVEPredPatternFromNumElements(NumActiveElems); + if ((PredPattern != None) && + NumActiveElems <= (MinSVEVectorSize / ElementSize)) + return getPTrue(DAG, dl, Op.getValueType(), *PredPattern); + } + return SDValue(); + } case Intrinsic::aarch64_sve_sunpkhi: return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(), Op.getOperand(1)); diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll index 1ddd24e..211361d 100644 --- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll +++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll @@ -475,6 +475,58 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) { ret <2 x i1> %active.lane.mask } +define @lane_mask_nxv4i1_imm3() { +; CHECK-LABEL: lane_mask_nxv4i1_imm3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s, vl3 +; CHECK-NEXT: ret +entry: + %active.lane.mask = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 3) + ret %active.lane.mask +} + +define @lane_mask_nxv4i1_imm5() { +; CHECK-LABEL: lane_mask_nxv4i1_imm5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: whilelo p0.s, xzr, x8 +; CHECK-NEXT: ret +entry: + %active.lane.mask = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 5) + ret %active.lane.mask +} + +define @lane_mask_nxv4i1_imm4() { +; CHECK-LABEL: lane_mask_nxv4i1_imm4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ret +entry: + %active.lane.mask = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 10, i64 14) + ret %active.lane.mask +} + +define @lane_mask_nxv16i1_imm10() { +; CHECK-LABEL: lane_mask_nxv16i1_imm10: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #10 +; CHECK-NEXT: whilelo p0.b, xzr, x8 +; CHECK-NEXT: ret +entry: + %active.lane.mask = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 10) + ret %active.lane.mask +} + +define @lane_mask_nxv16i1_imm256() vscale_range(16, 16) { +; CHECK-LABEL: lane_mask_nxv16i1_imm256: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret +entry: + %active.lane.mask = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 256) + ret %active.lane.mask +} + declare @llvm.get.active.lane.mask.nxv32i1.i32(i32, i32) declare @llvm.get.active.lane.mask.nxv16i1.i32(i32, i32) -- 2.7.4