From 5c8188c7bc524c3e4ea22762645fcbe43042aee9 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Mon, 12 Jun 2023 13:17:27 -0500 Subject: [PATCH] [DAGCombine] Use `IsKnownNeverZero` to see if we need zero-check in is_pow2 setcc patern `ctpop(X) eq/ne 1` is checking if X is a non-zero power of 2. Power of 2 check including zero is `(X & (X-1)) eq/ne 0` and unfortunately there is no good pattern for checking a power of 2 while excluding zero. So, when lowering `ctpop(X) eq/ne 1`, explicitly check `IsKnownNeverZero(X)` to maybe be able to optimize out the extra zero check. We need this explicitly as DAGCombiner does not re-analyze provable setcc nodes, and the middle-end never finds it beneficially to broaden `ctpop(X) eq/ne 1` -> `ctpop(X) ule/ugt 1` (power of 2 including zero). Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D152675 --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 6 ++- llvm/test/CodeGen/X86/ispow2.ll | 68 ++++++++---------------- 2 files changed, 26 insertions(+), 48 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 32be369..6da772d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4085,8 +4085,12 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT); SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); - SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond); SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond); + // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so + // check before the emit a potentially unnecessary op. + if (DAG.isKnownNeverZero(CTOp)) + return RHS; + SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond); unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR; return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS); } diff --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll index 9fb2f9b..45d04c2 100644 --- a/llvm/test/CodeGen/X86/ispow2.ll +++ b/llvm/test/CodeGen/X86/ispow2.ll @@ -12,19 +12,14 @@ define i1 @is_pow2_non_zero(i32 %xin) { ; CHECK-NOBMI-NEXT: orl $256, %edi # imm = 0x100 ; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax ; CHECK-NOBMI-NEXT: testl %eax, %edi -; CHECK-NOBMI-NEXT: sete %cl -; CHECK-NOBMI-NEXT: testl %edi, %edi -; CHECK-NOBMI-NEXT: setne %al -; CHECK-NOBMI-NEXT: andb %cl, %al +; CHECK-NOBMI-NEXT: sete %al ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI2-LABEL: is_pow2_non_zero: ; CHECK-BMI2: # %bb.0: ; CHECK-BMI2-NEXT: orl $256, %edi # imm = 0x100 -; CHECK-BMI2-NEXT: setne %cl ; CHECK-BMI2-NEXT: blsrl %edi, %eax ; CHECK-BMI2-NEXT: sete %al -; CHECK-BMI2-NEXT: andb %cl, %al ; CHECK-BMI2-NEXT: retq %x = or i32 %xin, 256 %cnt = call i32 @llvm.ctpop.i32(i32 %x) @@ -64,19 +59,14 @@ define i1 @neither_pow2_non_zero(i32 %xin) { ; CHECK-NOBMI-NEXT: orl $256, %edi # imm = 0x100 ; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax ; CHECK-NOBMI-NEXT: testl %eax, %edi -; CHECK-NOBMI-NEXT: setne %cl -; CHECK-NOBMI-NEXT: testl %edi, %edi -; CHECK-NOBMI-NEXT: sete %al -; CHECK-NOBMI-NEXT: orb %cl, %al +; CHECK-NOBMI-NEXT: setne %al ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI2-LABEL: neither_pow2_non_zero: ; CHECK-BMI2: # %bb.0: ; CHECK-BMI2-NEXT: orl $256, %edi # imm = 0x100 -; CHECK-BMI2-NEXT: sete %cl ; CHECK-BMI2-NEXT: blsrl %edi, %eax ; CHECK-BMI2-NEXT: setne %al -; CHECK-BMI2-NEXT: orb %cl, %al ; CHECK-BMI2-NEXT: retq %x = or i32 %xin, 256 %cnt = call i32 @llvm.ctpop.i32(i32 %x) @@ -94,24 +84,16 @@ define <4 x i1> @is_pow2_non_zero_4xv64(<4 x i64> %xin) { ; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm3 ; CHECK-NOBMI-NEXT: paddq %xmm2, %xmm3 ; CHECK-NOBMI-NEXT: pand %xmm1, %xmm3 -; CHECK-NOBMI-NEXT: pxor %xmm4, %xmm4 -; CHECK-NOBMI-NEXT: pcmpeqd %xmm4, %xmm3 -; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2] -; CHECK-NOBMI-NEXT: pand %xmm3, %xmm5 -; CHECK-NOBMI-NEXT: pcmpeqd %xmm4, %xmm1 -; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2] -; CHECK-NOBMI-NEXT: pand %xmm1, %xmm3 -; CHECK-NOBMI-NEXT: pandn %xmm5, %xmm3 +; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm1 +; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm3 +; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,0,3,2] +; CHECK-NOBMI-NEXT: pand %xmm3, %xmm4 ; CHECK-NOBMI-NEXT: paddq %xmm0, %xmm2 -; CHECK-NOBMI-NEXT: pand %xmm0, %xmm2 -; CHECK-NOBMI-NEXT: pcmpeqd %xmm4, %xmm2 -; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] -; CHECK-NOBMI-NEXT: pand %xmm2, %xmm1 -; CHECK-NOBMI-NEXT: pcmpeqd %xmm4, %xmm0 -; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2] ; CHECK-NOBMI-NEXT: pand %xmm2, %xmm0 -; CHECK-NOBMI-NEXT: pandn %xmm1, %xmm0 -; CHECK-NOBMI-NEXT: packssdw %xmm3, %xmm0 +; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] +; CHECK-NOBMI-NEXT: pand %xmm1, %xmm0 +; CHECK-NOBMI-NEXT: packssdw %xmm4, %xmm0 ; CHECK-NOBMI-NEXT: retq ; ; CHECK-AVX2-LABEL: is_pow2_non_zero_4xv64: @@ -153,27 +135,19 @@ define <4 x i1> @neither_pow2_non_zero_4xv64(<4 x i64> %xin) { ; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm3 ; CHECK-NOBMI-NEXT: paddq %xmm2, %xmm3 ; CHECK-NOBMI-NEXT: pand %xmm1, %xmm3 -; CHECK-NOBMI-NEXT: pxor %xmm4, %xmm4 -; CHECK-NOBMI-NEXT: pcmpeqd %xmm4, %xmm3 -; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2] -; CHECK-NOBMI-NEXT: pand %xmm3, %xmm5 -; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm5 -; CHECK-NOBMI-NEXT: pcmpeqd %xmm4, %xmm1 -; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2] -; CHECK-NOBMI-NEXT: pand %xmm1, %xmm3 -; CHECK-NOBMI-NEXT: por %xmm5, %xmm3 -; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm1 -; CHECK-NOBMI-NEXT: paddq %xmm2, %xmm1 -; CHECK-NOBMI-NEXT: pand %xmm0, %xmm1 -; CHECK-NOBMI-NEXT: pcmpeqd %xmm4, %xmm1 -; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,0,3,2] -; CHECK-NOBMI-NEXT: pand %xmm1, %xmm5 -; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm5 -; CHECK-NOBMI-NEXT: pcmpeqd %xmm4, %xmm0 +; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm1 +; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm3 +; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,0,3,2] +; CHECK-NOBMI-NEXT: pand %xmm3, %xmm4 +; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm4 +; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm3 +; CHECK-NOBMI-NEXT: paddq %xmm2, %xmm3 +; CHECK-NOBMI-NEXT: pand %xmm3, %xmm0 +; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm0 ; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] ; CHECK-NOBMI-NEXT: pand %xmm1, %xmm0 -; CHECK-NOBMI-NEXT: por %xmm5, %xmm0 -; CHECK-NOBMI-NEXT: packssdw %xmm3, %xmm0 +; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm0 +; CHECK-NOBMI-NEXT: packssdw %xmm4, %xmm0 ; CHECK-NOBMI-NEXT: retq ; ; CHECK-AVX2-LABEL: neither_pow2_non_zero_4xv64: -- 2.7.4