From 80b897e21bf0ac56b04d415cf9bf671f81a84416 Mon Sep 17 00:00:00 2001 From: =?utf8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Tue, 4 May 2021 13:15:13 +0200 Subject: [PATCH] [InstCombine] ctpop(X) ^ ctpop(Y) & 1 --> ctpop(X^Y) & 1 (PR50094) Original pattern: (__builtin_parity(x) ^ __builtin_parity(y)) LLVM rewrites it as: (__builtin_popcount(x) ^ __builtin_popcount(y)) & 1 Optimized form: __builtin_popcount(X^Y) & 1 Alive proof: https://alive2.llvm.org/ce/z/-GdWFr Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D101802 --- .../InstCombine/InstCombineSimplifyDemanded.cpp | 11 +++++++++++ llvm/test/Transforms/InstCombine/ctpop.ll | 21 +++++++++------------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 6175c92..2b9c1f2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -220,6 +220,17 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1)) return I; + Value *LHS, *RHS; + if (DemandedMask == 1 && + match(I->getOperand(0), m_Intrinsic(m_Value(LHS))) && + match(I->getOperand(1), m_Intrinsic(m_Value(RHS)))) { + // (ctpop(X) ^ ctpop(Y)) & 1 --> ctpop(X^Y) & 1 + IRBuilderBase::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(I); + auto *Xor = Builder.CreateXor(LHS, RHS); + return Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Xor); + } + assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll index 9666eb5..54d8ed9 100644 --- a/llvm/test/Transforms/InstCombine/ctpop.ll +++ b/llvm/test/Transforms/InstCombine/ctpop.ll @@ -386,10 +386,9 @@ define i32 @zext_ctpop_extra_use(i16 %x, i32* %q) { define i32 @parity_xor(i32 %arg, i32 %arg1) { ; CHECK-LABEL: @parity_xor( -; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG:%.*]]), !range [[RNG1]] -; CHECK-NEXT: [[I2:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG1:%.*]]), !range [[RNG1]] -; CHECK-NEXT: [[I3:%.*]] = xor i32 [[I2]], [[I]] -; CHECK-NEXT: [[I4:%.*]] = and i32 [[I3]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[ARG1:%.*]], [[ARG:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP1]]), !range [[RNG1]] +; CHECK-NEXT: [[I4:%.*]] = and i32 [[TMP2]], 1 ; CHECK-NEXT: ret i32 [[I4]] ; %i = tail call i32 @llvm.ctpop.i32(i32 %arg) @@ -401,10 +400,9 @@ define i32 @parity_xor(i32 %arg, i32 %arg1) { define i32 @parity_xor_trunc(i64 %arg, i64 %arg1) { ; CHECK-LABEL: @parity_xor_trunc( -; CHECK-NEXT: [[I:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[ARG:%.*]]), !range [[RNG5:![0-9]+]] -; CHECK-NEXT: [[I2:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[ARG1:%.*]]), !range [[RNG5]] -; CHECK-NEXT: [[I3:%.*]] = xor i64 [[I2]], [[I]] -; CHECK-NEXT: [[I4:%.*]] = trunc i64 [[I3]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[ARG1:%.*]], [[ARG:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]]), !range [[RNG5:![0-9]+]] +; CHECK-NEXT: [[I4:%.*]] = trunc i64 [[TMP2]] to i32 ; CHECK-NEXT: [[I5:%.*]] = and i32 [[I4]], 1 ; CHECK-NEXT: ret i32 [[I5]] ; @@ -418,10 +416,9 @@ define i32 @parity_xor_trunc(i64 %arg, i64 %arg1) { define <2 x i32> @parity_xor_vec(<2 x i32> %arg, <2 x i32> %arg1) { ; CHECK-LABEL: @parity_xor_vec( -; CHECK-NEXT: [[I:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[ARG:%.*]]) -; CHECK-NEXT: [[I2:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[ARG1:%.*]]) -; CHECK-NEXT: [[I3:%.*]] = xor <2 x i32> [[I2]], [[I]] -; CHECK-NEXT: [[I4:%.*]] = and <2 x i32> [[I3]], +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[ARG1:%.*]], [[ARG:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP1]]) +; CHECK-NEXT: [[I4:%.*]] = and <2 x i32> [[TMP2]], ; CHECK-NEXT: ret <2 x i32> [[I4]] ; %i = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %arg) -- 2.7.4