From a942a944245374fc62a5af8ee3abbc579f5ee7a5 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Mon, 20 Feb 2023 16:38:21 -0800 Subject: [PATCH] [X86] Improve (select carry C1+1 C1) Without this patch: return X < 4 ? 3 : 2; return X < 9 ? 7 : 6; are compiled as: 31 c0 xor %eax,%eax 83 ff 04 cmp $0x4,%edi 0f 93 c0 setae %al 83 f0 03 xor $0x3,%eax 31 c0 xor %eax,%eax 83 ff 09 cmp $0x9,%edi 0f 92 c0 setb %al 83 c8 06 or $0x6,%eax respectively. With this patch, we generate: 31 c0 xor %eax,%eax 83 ff 04 cmp $0x4,%edi 83 d0 02 adc $0x2,%eax 31 c0 xor %eax,%eax 83 ff 04 cmp $0x4,%edi 83 d0 02 adc $0x2,%eax respectively, saving 3 bytes while reducing the tree height. This patch recognizes the equivalence of OR and ADD (if bits do not overlap) and delegates to combineAddOrSubToADCOrSBB for further processing. The same applies to the equivalence of XOR and SUB. Differential Revision: https://reviews.llvm.org/D143838 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 34 +++++++++++++++++++++++++++++++++ llvm/test/CodeGen/X86/select_const.ll | 25 ++++++++++-------------- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 03736ac..8d8f186 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -50108,6 +50108,34 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static SDValue combineOrXorWithSETCC(SDNode *N, SDValue N0, SDValue N1, + SelectionDAG &DAG) { + assert((N->getOpcode() == ISD::XOR || N->getOpcode() == ISD::OR) && + "Unexpected opcode"); + + // Delegate to combineAddOrSubToADCOrSBB if we have: + // + // (xor/or (zero_extend (setcc)) imm) + // + // where imm is odd if and only if we have xor, in which case the XOR/OR are + // equivalent to a SUB/ADD, respectively. + if (N0.getOpcode() == ISD::ZERO_EXTEND && + N0.getOperand(0).getOpcode() == X86ISD::SETCC && N0.hasOneUse()) { + if (auto *N1C = dyn_cast(N1)) { + bool IsSub = N->getOpcode() == ISD::XOR; + bool N1COdd = N1C->getZExtValue() & 1; + if (IsSub ? N1COdd : !N1COdd) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + if (SDValue R = combineAddOrSubToADCOrSBB(IsSub, DL, VT, N1, N0, DAG)) + return R; + } + } + } + + return SDValue(); +} + static SDValue combineOr(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -50255,6 +50283,9 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, if (SDValue R = foldMaskedMerge(N, DAG)) return R; + if (SDValue R = combineOrXorWithSETCC(N, N0, N1, DAG)) + return R; + return SDValue(); } @@ -52730,6 +52761,9 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG, if (SDValue SetCC = foldXor1SetCC(N, DAG)) return SetCC; + if (SDValue R = combineOrXorWithSETCC(N, N0, N1, DAG)) + return R; + if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG)) return RV; diff --git a/llvm/test/CodeGen/X86/select_const.ll b/llvm/test/CodeGen/X86/select_const.ll index 431560b..d35cd65 100644 --- a/llvm/test/CodeGen/X86/select_const.ll +++ b/llvm/test/CodeGen/X86/select_const.ll @@ -525,9 +525,8 @@ define i32 @select_eq0_3_2(i32 %X) { ; CHECK-LABEL: select_eq0_3_2: ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: sete %al -; CHECK-NEXT: orl $2, %eax +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: adcl $2, %eax ; CHECK-NEXT: retq %cmp = icmp eq i32 %X, 0 %sel = select i1 %cmp, i32 3, i32 2 @@ -539,8 +538,7 @@ define i32 @select_ugt3_2_3(i32 %X) { ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $4, %edi -; CHECK-NEXT: setae %al -; CHECK-NEXT: xorl $3, %eax +; CHECK-NEXT: adcl $2, %eax ; CHECK-NEXT: retq %cmp = icmp ugt i32 %X, 3 %sel = select i1 %cmp, i32 2, i32 3 @@ -552,8 +550,7 @@ define i32 @select_ult9_7_6(i32 %X) { ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $9, %edi -; CHECK-NEXT: setb %al -; CHECK-NEXT: orl $6, %eax +; CHECK-NEXT: adcl $6, %eax ; CHECK-NEXT: retq %cmp = icmp ult i32 %X, 9 %sel = select i1 %cmp, i32 7, i32 6 @@ -563,23 +560,21 @@ define i32 @select_ult9_7_6(i32 %X) { define i32 @select_ult2_2_3(i32 %X) { ; CHECK-LABEL: select_ult2_2_3: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $2, %edi -; CHECK-NEXT: setb %al -; CHECK-NEXT: xorl $3, %eax +; CHECK-NEXT: movl $3, %eax +; CHECK-NEXT: sbbl $0, %eax ; CHECK-NEXT: retq %cmp = icmp ult i32 %X, 2 %cond = select i1 %cmp, i32 2, i32 3 ret i32 %cond } -define i32 @select_ugt2_3_2(i32 %X) { -; CHECK-LABEL: select_ugt2_3_2: +define i32 @select_ugt3_3_2(i32 %X) { +; CHECK-LABEL: select_ugt3_3_2: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $4, %edi -; CHECK-NEXT: setae %al -; CHECK-NEXT: orl $2, %eax +; CHECK-NEXT: movl $2, %eax +; CHECK-NEXT: sbbl $-1, %eax ; CHECK-NEXT: retq %cmp.inv = icmp ugt i32 %X, 3 %cond = select i1 %cmp.inv, i32 3, i32 2 -- 2.7.4