From: Sanjay Patel Date: Mon, 21 Jun 2021 14:51:30 +0000 (-0400) Subject: [InstCombine] fold ctlz/cttz-of-select with 1 or more constant arms X-Git-Tag: llvmorg-14-init~3465 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=64b2676ca8290c7ff971c6f5b7a355b0ae7693de;p=platform%2Fupstream%2Fllvm.git [InstCombine] fold ctlz/cttz-of-select with 1 or more constant arms Building on: 4c44b02d87 ...and adding handling for the extra operand in these intrinsics. This pattern is discussed in: https://llvm.org/PR50140 --- diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index c198863..556550b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1080,6 +1080,11 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { case Intrinsic::ctlz: if (auto *I = foldCttzCtlz(*II, *this)) return I; + + // If the operand is a select with constant arm(s), try to hoist ctlz/cttz. + if (auto *Sel = dyn_cast(II->getArgOperand(0))) + if (Instruction *R = FoldOpIntoSelect(*II, Sel)) + return R; break; case Intrinsic::ctpop: diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 257931d..67aca4e 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -941,8 +941,14 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, if (auto *II = dyn_cast(&I)) { assert(canConstantFoldCallTo(II, cast(II->getCalledOperand())) && "Expected constant-foldable intrinsic"); + Intrinsic::ID IID = II->getIntrinsicID(); + SmallVector Args = {SO}; - return Builder.CreateIntrinsic(II->getIntrinsicID(), I.getType(), SO); + // Propagate the zero-is-undef argument to the new instruction. + if (IID == Intrinsic::ctlz || IID == Intrinsic::cttz) + Args.push_back(II->getArgOperand(1)); + + return Builder.CreateIntrinsic(IID, I.getType(), Args); } assert(I.isBinaryOp() && "Unexpected opcode for select folding"); diff --git a/llvm/test/Transforms/InstCombine/intrinsic-select.ll b/llvm/test/Transforms/InstCombine/intrinsic-select.ll index 1ee792d..92c5e3b 100644 --- a/llvm/test/Transforms/InstCombine/intrinsic-select.ll +++ b/llvm/test/Transforms/InstCombine/intrinsic-select.ll @@ -14,8 +14,7 @@ declare <3 x i7> @llvm.ctpop.v3i7(<3 x i7>) define i32 @ctlz_sel_const_true_false(i1 %b) { ; CHECK-LABEL: @ctlz_sel_const_true_false( -; CHECK-NEXT: [[S:%.*]] = select i1 [[B:%.*]], i32 5, i32 -7 -; CHECK-NEXT: [[C:%.*]] = call i32 @llvm.ctlz.i32(i32 [[S]], i1 true), !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[B:%.*]], i32 29, i32 0 ; CHECK-NEXT: ret i32 [[C]] ; %s = select i1 %b, i32 5, i32 -7 @@ -25,8 +24,8 @@ define i32 @ctlz_sel_const_true_false(i1 %b) { define i32 @ctlz_sel_const_true(i1 %b, i32 %x) { ; CHECK-LABEL: @ctlz_sel_const_true( -; CHECK-NEXT: [[S:%.*]] = select i1 [[B:%.*]], i32 5, i32 [[X:%.*]] -; CHECK-NEXT: [[C:%.*]] = call i32 @llvm.ctlz.i32(i32 [[S]], i1 false), !range [[RNG1:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[B:%.*]], i32 29, i32 [[TMP1]] ; CHECK-NEXT: ret i32 [[C]] ; %s = select i1 %b, i32 5, i32 %x @@ -36,8 +35,8 @@ define i32 @ctlz_sel_const_true(i1 %b, i32 %x) { define <3 x i17> @ctlz_sel_const_false(<3 x i1> %b, <3 x i17> %x) { ; CHECK-LABEL: @ctlz_sel_const_false( -; CHECK-NEXT: [[S:%.*]] = select <3 x i1> [[B:%.*]], <3 x i17> [[X:%.*]], <3 x i17> -; CHECK-NEXT: [[C:%.*]] = call <3 x i17> @llvm.ctlz.v3i17(<3 x i17> [[S]], i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i17> @llvm.ctlz.v3i17(<3 x i17> [[X:%.*]], i1 true) +; CHECK-NEXT: [[C:%.*]] = select <3 x i1> [[B:%.*]], <3 x i17> [[TMP1]], <3 x i17> ; CHECK-NEXT: ret <3 x i17> [[C]] ; %s = select <3 x i1> %b, <3 x i17> %x, <3 x i17> @@ -49,7 +48,7 @@ define i32 @ctlz_sel_const_true_false_extra_use(i1 %b) { ; CHECK-LABEL: @ctlz_sel_const_true_false_extra_use( ; CHECK-NEXT: [[S:%.*]] = select i1 [[B:%.*]], i32 -1, i32 7 ; CHECK-NEXT: call void @use(i32 [[S]]) -; CHECK-NEXT: [[C:%.*]] = call i32 @llvm.ctlz.i32(i32 [[S]], i1 true), !range [[RNG2:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = call i32 @llvm.ctlz.i32(i32 [[S]], i1 true), !range [[RNG1:![0-9]+]] ; CHECK-NEXT: ret i32 [[C]] ; %s = select i1 %b, i32 -1, i32 7 @@ -60,8 +59,7 @@ define i32 @ctlz_sel_const_true_false_extra_use(i1 %b) { define i32 @cttz_sel_const_true_false(i1 %b) { ; CHECK-LABEL: @cttz_sel_const_true_false( -; CHECK-NEXT: [[S:%.*]] = select i1 [[B:%.*]], i32 4, i32 -7 -; CHECK-NEXT: [[C:%.*]] = call i32 @llvm.cttz.i32(i32 [[S]], i1 true), !range [[RNG1]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[B:%.*]], i32 2, i32 0 ; CHECK-NEXT: ret i32 [[C]] ; %s = select i1 %b, i32 4, i32 -7 @@ -71,8 +69,8 @@ define i32 @cttz_sel_const_true_false(i1 %b) { define i32 @cttz_sel_const_true(i1 %b, i32 %x) { ; CHECK-LABEL: @cttz_sel_const_true( -; CHECK-NEXT: [[S:%.*]] = select i1 [[B:%.*]], i32 5, i32 [[X:%.*]] -; CHECK-NEXT: [[C:%.*]] = call i32 @llvm.cttz.i32(i32 [[S]], i1 true), !range [[RNG1]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range [[RNG0]] +; CHECK-NEXT: [[C:%.*]] = select i1 [[B:%.*]], i32 0, i32 [[TMP1]] ; CHECK-NEXT: ret i32 [[C]] ; %s = select i1 %b, i32 5, i32 %x @@ -82,8 +80,8 @@ define i32 @cttz_sel_const_true(i1 %b, i32 %x) { define <3 x i5> @cttz_sel_const_false(<3 x i1> %b, <3 x i5> %x) { ; CHECK-LABEL: @cttz_sel_const_false( -; CHECK-NEXT: [[S:%.*]] = select <3 x i1> [[B:%.*]], <3 x i5> [[X:%.*]], <3 x i5> -; CHECK-NEXT: [[C:%.*]] = call <3 x i5> @llvm.cttz.v3i5(<3 x i5> [[S]], i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i5> @llvm.cttz.v3i5(<3 x i5> [[X:%.*]], i1 false) +; CHECK-NEXT: [[C:%.*]] = select <3 x i1> [[B:%.*]], <3 x i5> [[TMP1]], <3 x i5> ; CHECK-NEXT: ret <3 x i5> [[C]] ; %s = select <3 x i1> %b, <3 x i5> %x, <3 x i5> @@ -95,7 +93,7 @@ define i32 @cttz_sel_const_true_false_extra_use(i1 %b) { ; CHECK-LABEL: @cttz_sel_const_true_false_extra_use( ; CHECK-NEXT: [[S:%.*]] = select i1 [[B:%.*]], i32 5, i32 -8 ; CHECK-NEXT: call void @use(i32 [[S]]) -; CHECK-NEXT: [[C:%.*]] = call i32 @llvm.cttz.i32(i32 [[S]], i1 true), !range [[RNG1]] +; CHECK-NEXT: [[C:%.*]] = call i32 @llvm.cttz.i32(i32 [[S]], i1 true), !range [[RNG0]] ; CHECK-NEXT: ret i32 [[C]] ; %s = select i1 %b, i32 5, i32 -8 @@ -116,7 +114,7 @@ define i32 @ctpop_sel_const_true_false(i1 %b) { define i32 @ctpop_sel_const_true(i1 %b, i32 %x) { ; CHECK-LABEL: @ctpop_sel_const_true( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG1]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0]] ; CHECK-NEXT: [[C:%.*]] = select i1 [[B:%.*]], i32 2, i32 [[TMP1]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -140,7 +138,7 @@ define i32 @ctpop_sel_const_true_false_extra_use(i1 %b) { ; CHECK-LABEL: @ctpop_sel_const_true_false_extra_use( ; CHECK-NEXT: [[S:%.*]] = select i1 [[B:%.*]], i32 5, i32 7 ; CHECK-NEXT: call void @use(i32 [[S]]) -; CHECK-NEXT: [[C:%.*]] = call i32 @llvm.ctpop.i32(i32 [[S]]), !range [[RNG3:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = call i32 @llvm.ctpop.i32(i32 [[S]]), !range [[RNG2:![0-9]+]] ; CHECK-NEXT: ret i32 [[C]] ; %s = select i1 %b, i32 5, i32 7