return true;
}
+ // Return true if its desirable to try and optimize LogicOp(SETCC0, SETCC1).
+ // An example (what is implemented as of writing this) is:
+ // With C as a power of 2 and C != 0 and C != INT_MIN:
+ // (icmp eq A, C) | (icmp eq A, -C)
+ // -> (icmp eq and(add(A, C), ~(C + C)), 0)
+ // (icmp ne A, C) & (icmp ne A, -C)w
+ // -> (icmp ne and(add(A, C), ~(C + C)), 0)
+ //
+ // @param LogicOp the logic op
+ // @param SETCC0 the first of the SETCC nodes
+ // @param SETCC0 the second of the SETCC nodes
+ virtual bool isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
+ const SDNode *SETCC0,
+ const SDNode *SETCC1) const {
+ return false;
+ }
+
/// Return true if it is profitable to combine an XOR of a logical shift
/// to create a logical shift of NOT. This transformation may not be desirable
/// if it disrupts a particularly auspicious target-specific tree (e.g.
return SDValue();
}
+static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
+ assert(
+ (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
+ "Invalid Op to combine SETCC with");
+
+ // TODO: Search past casts/truncates.
+ SDValue LHS = LogicOp->getOperand(0);
+ SDValue RHS = LogicOp->getOperand(1);
+ if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isDesirableToCombineLogicOpOfSETCC(LogicOp, LHS.getNode(),
+ RHS.getNode()))
+ return SDValue();
+
+ ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
+ ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
+
+ SDValue LHS0 = LHS->getOperand(0);
+ SDValue RHS0 = RHS->getOperand(0);
+ SDValue LHS1 = LHS->getOperand(1);
+ SDValue RHS1 = RHS->getOperand(1);
+
+ auto *LHS1C = dyn_cast<ConstantSDNode>(LHS1);
+ auto *RHS1C = dyn_cast<ConstantSDNode>(RHS1);
+ EVT VT = LogicOp->getValueType(0);
+ EVT OpVT = LHS0.getValueType();
+ SDLoc DL(LogicOp);
+
+ // With C as a power of 2 and C != 0 and C != INT_MIN:
+ // (icmp eq A, C) | (icmp eq A, -C)
+ // -> (icmp eq and(add(A, C), ~(C + C)), 0)
+ // (icmp ne A, C) & (icmp ne A, -C)w
+ // -> (icmp ne and(add(A, C), ~(C + C)), 0)
+ if (CCL == CCR &&
+ CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
+ LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger() && LHS.hasOneUse() &&
+ RHS.hasOneUse() && LHS1C->getAPIntValue() == (-RHS1C->getAPIntValue())) {
+ const ConstantSDNode *Pow2 = nullptr;
+ if (LHS1C->getAPIntValue().isPowerOf2())
+ Pow2 = LHS1C;
+ else if (RHS1C->getAPIntValue().isPowerOf2())
+ Pow2 = RHS1C;
+ // isPowerOf2 is only for non-zero powers of 2.
+ if (Pow2 != nullptr && !Pow2->getAPIntValue().isMinSignedValue()) {
+ const APInt &C = Pow2->getAPIntValue();
+ SDValue AddOp =
+ DAG.getNode(ISD::ADD, DL, OpVT, LHS0, DAG.getConstant(C, DL, OpVT));
+ SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
+ DAG.getConstant(~(C + C), DL, OpVT));
+ return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
+ DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
+ }
+ }
+
+ return SDValue();
+}
+
/// This contains all DAGCombine rules which reduce two values combined by
/// an And operation to a single value. This makes them reusable in the context
/// of visitSELECT(). Rules involving constants are not included as
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
+ if (SDValue R = foldAndOrOfSETCC(N, DAG))
+ return R;
+
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;
+ if (SDValue R = foldAndOrOfSETCC(N, DAG))
+ return R;
+
if (SDValue Combined = visitORLike(N0, N1, N))
return Combined;
; X86-LABEL: eq_pow_or:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $32, %eax
-; X86-NEXT: sete %cl
-; X86-NEXT: cmpl $-32, %eax
+; X86-NEXT: addl $32, %eax
+; X86-NEXT: testl $-65, %eax
; X86-NEXT: sete %al
-; X86-NEXT: orb %cl, %al
; X86-NEXT: retl
;
; X64-LABEL: eq_pow_or:
; X64: # %bb.0:
-; X64-NEXT: cmpl $32, %edi
-; X64-NEXT: sete %cl
-; X64-NEXT: cmpl $-32, %edi
+; X64-NEXT: addl $32, %edi
+; X64-NEXT: testl $-65, %edi
; X64-NEXT: sete %al
-; X64-NEXT: orb %cl, %al
; X64-NEXT: retq
%2 = icmp eq i32 %0, 32
%3 = icmp eq i32 %0, -32
; X86-LABEL: ne_pow_and:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpb $16, %al
-; X86-NEXT: setne %cl
-; X86-NEXT: cmpb $-16, %al
+; X86-NEXT: addb $16, %al
+; X86-NEXT: testb $-33, %al
; X86-NEXT: setne %al
-; X86-NEXT: andb %cl, %al
; X86-NEXT: retl
;
; X64-LABEL: ne_pow_and:
; X64: # %bb.0:
-; X64-NEXT: cmpb $16, %dil
-; X64-NEXT: setne %cl
-; X64-NEXT: cmpb $-16, %dil
+; X64-NEXT: addb $16, %dil
+; X64-NEXT: testb $-33, %dil
; X64-NEXT: setne %al
-; X64-NEXT: andb %cl, %al
; X64-NEXT: retq
%2 = icmp ne i8 %0, 16
%3 = icmp ne i8 %0, -16