From: Rosie Sumpter Date: Mon, 9 May 2022 08:35:13 +0000 (+0100) Subject: [AArch64][SVE] Improve codegen when extracting first lane of active lane mask X-Git-Tag: upstream/15.0.7~8261 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1a2665902f128155fa1febafea990ebaee9476f2;p=platform%2Fupstream%2Fllvm.git [AArch64][SVE] Improve codegen when extracting first lane of active lane mask When extracting the first lane of a predicate created using the llvm.get.active.lane.mask intrinsic, it should give the same codegen as when the predicate is created using the llvm.aarch64.sve.whilelo intrinsic, since get.active.lane.mask is lowered to whilelo. This patch ensures the codegen is the same by recognizing llvm.get.active.lane.mask as a flag-setting operation in this case. Differential Revision: https://reviews.llvm.org/D125215 --- diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index dc99ed0..3187430 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14666,7 +14666,9 @@ static bool isPredicateCCSettingOp(SDValue N) { N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele || N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo || N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels || - N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt))) + N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt || + // get_active_lane_mask is lowered to a whilelo instruction. + N.getConstantOperandVal(0) == Intrinsic::get_active_lane_mask))) return true; return false; diff --git a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll index 380501c..6812f0b 100644 --- a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll +++ b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll @@ -170,6 +170,17 @@ define i1 @whilelt_first(i64 %next, i64 %end) { ret i1 %bit } +define i1 @lane_mask_first(i64 %next, i64 %end) { +; CHECK-LABEL: lane_mask_first: +; CHECK: // %bb.0: +; CHECK-NEXT: whilelo p0.s, x0, x1 +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret + %predicate = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 %next, i64 %end) + %bit = extractelement %predicate, i64 0 + ret i1 %bit +} + declare i64 @llvm.vscale.i64() declare @llvm.aarch64.sve.whilege.nxv4i1.i64(i64, i64) declare @llvm.aarch64.sve.whilegt.nxv4i1.i64(i64, i64) @@ -179,3 +190,4 @@ declare @llvm.aarch64.sve.whilele.nxv4i1.i64(i64, i64) declare @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64, i64) declare @llvm.aarch64.sve.whilels.nxv4i1.i64(i64, i64) declare @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64, i64) +declare @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64)