From: Sander de Smalen Date: Tue, 31 Jan 2023 22:20:46 +0000 (+0000) Subject: [InstCombine] Promote expression tree with @llvm.vscale when zero-extending result. X-Git-Tag: upstream/17.0.6~18829 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=da4a5a46b3c86df436d37f81c0b7849f93d7fbde;p=platform%2Fupstream%2Fllvm.git [InstCombine] Promote expression tree with @llvm.vscale when zero-extending result. The LoopVectorizer emits the (scaled) element count as i32, which for scalable VFs results in calls to @llvm.vscale.i32(). This value is scaled and further zero-extended to i64. The zero-extend can be folded away by executing the whole expression in i64 type using @llvm.vscale.i64(). Any logical `and` that would needed to mask the result can be further folded away by KnownBits analysis when vscale_range is set. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D143016 --- diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 70ec15f..118f717 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -252,6 +252,20 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty, Res = CastInst::Create( static_cast(Opc), I->getOperand(0), Ty); break; + case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + default: + llvm_unreachable("Unsupported call!"); + case Intrinsic::vscale: { + Function *Fn = + Intrinsic::getDeclaration(I->getModule(), Intrinsic::vscale, {Ty}); + Res = CallInst::Create(Fn->getFunctionType(), Fn); + break; + } + } + } + break; default: // TODO: Can handle more cases here. llvm_unreachable("Unreachable!"); @@ -1213,6 +1227,13 @@ static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, return false; return true; } + case Instruction::Call: + // llvm.vscale() can always be executed in larger type, because the + // value is automatically zero-extended. + if (const IntrinsicInst *II = dyn_cast(I)) + if (II->getIntrinsicID() == Intrinsic::vscale) + return true; + return false; default: // TODO: Can handle more cases here. return false; diff --git a/llvm/test/Transforms/InstCombine/vscale.ll b/llvm/test/Transforms/InstCombine/vscale.ll new file mode 100644 index 0000000..dbb5ca4 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/vscale.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='instcombine' -S < %s | FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +define i64 @promote_vscale_i32_to_i64() { +; CHECK-LABEL: @promote_vscale_i32_to_i64( +; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[EXT:%.*]] = and i64 [[VSCALE]], 4294967295 +; CHECK-NEXT: ret i64 [[EXT]] +; + %vscale = call i32 @llvm.vscale.i32() + %ext = zext i32 %vscale to i64 + ret i64 %ext +} + +define i64 @pomote_zext_shl_vscale_i32_to_i64() { +; CHECK-LABEL: @pomote_zext_shl_vscale_i32_to_i64( +; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[VSCALE]], 3 +; CHECK-NEXT: [[EXT:%.*]] = and i64 [[SHL]], 4294967288 +; CHECK-NEXT: ret i64 [[EXT]] +; + %vscale = call i32 @llvm.vscale.i32() + %shl = shl i32 %vscale, 3 + %ext = zext i32 %shl to i64 + ret i64 %ext +} + +; Same test as @pomote_zext_shl_vscale_i32_to_i64, but with the +; vscale_range attribute so that the 'and' is folded away. +define i64 @free_zext_vscale_shl_i32_to_i64() #0 { +; CHECK-LABEL: @free_zext_vscale_shl_i32_to_i64( +; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[SHL:%.*]] = shl nuw nsw i64 [[VSCALE]], 3 +; CHECK-NEXT: ret i64 [[SHL]] +; + %vscale = call i32 @llvm.vscale.i32() + %shl = shl i32 %vscale, 3 + %ext = zext i32 %shl to i64 + ret i64 %ext +} + +declare i32 @llvm.vscale.i32() + +attributes #0 = { vscale_range(1,16) }