From 1a2665902f128155fa1febafea990ebaee9476f2 Mon Sep 17 00:00:00 2001 From: Rosie Sumpter Date: Mon, 9 May 2022 09:35:13 +0100 Subject: [PATCH] [AArch64][SVE] Improve codegen when extracting first lane of active lane mask When extracting the first lane of a predicate created using the llvm.get.active.lane.mask intrinsic, it should give the same codegen as when the predicate is created using the llvm.aarch64.sve.whilelo intrinsic, since get.active.lane.mask is lowered to whilelo. This patch ensures the codegen is the same by recognizing llvm.get.active.lane.mask as a flag-setting operation in this case. Differential Revision: https://reviews.llvm.org/D125215 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 4 +++- llvm/test/CodeGen/AArch64/sve-cmp-folds.ll | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index dc99ed0..3187430 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14666,7 +14666,9 @@ static bool isPredicateCCSettingOp(SDValue N) { N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele || N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo || N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels || - N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt))) + N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt || + // get_active_lane_mask is lowered to a whilelo instruction. + N.getConstantOperandVal(0) == Intrinsic::get_active_lane_mask))) return true; return false; diff --git a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll index 380501c..6812f0b 100644 --- a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll +++ b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll @@ -170,6 +170,17 @@ define i1 @whilelt_first(i64 %next, i64 %end) { ret i1 %bit } +define i1 @lane_mask_first(i64 %next, i64 %end) { +; CHECK-LABEL: lane_mask_first: +; CHECK: // %bb.0: +; CHECK-NEXT: whilelo p0.s, x0, x1 +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret + %predicate = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 %next, i64 %end) + %bit = extractelement %predicate, i64 0 + ret i1 %bit +} + declare i64 @llvm.vscale.i64() declare @llvm.aarch64.sve.whilege.nxv4i1.i64(i64, i64) declare @llvm.aarch64.sve.whilegt.nxv4i1.i64(i64, i64) @@ -179,3 +190,4 @@ declare @llvm.aarch64.sve.whilele.nxv4i1.i64(i64, i64) declare @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64, i64) declare @llvm.aarch64.sve.whilels.nxv4i1.i64(i64, i64) declare @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64, i64) +declare @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64) -- 2.7.4