SDValue N2, SDValue N3, ISD::CondCode CC);
SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
const SDLoc &DL);
+ SDValue unfoldMaskedMerge(SDNode *N);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
const SDLoc &DL, bool foldBooleans);
SDValue rebuildSetCC(SDValue N);
return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
}
+// If the target has andn, bsl, or a similar bit-select instruction,
+// we want to unfold masked merge, with canonical pattern of:
+// | A | |B|
+// ((x ^ y) & m) ^ y
+// | D |
+// Into:
+// (x & m) | (y & ~m)
+SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
+ assert(N->getOpcode() == ISD::XOR);
+
+ EVT VT = N->getValueType(0);
+
+ // FIXME
+ if (VT.isVector())
+ return SDValue();
+
+ // There are 3 commutable operators in the pattern,
+ // so we have to deal with 8 possible variants of the basic pattern.
+ SDValue X, Y, M;
+ auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
+ if (And.getOpcode() != ISD::AND || !And.hasOneUse())
+ return false;
+ if (And.getOperand(XorIdx).getOpcode() != ISD::XOR ||
+ !And.getOperand(XorIdx).hasOneUse())
+ return false;
+ SDValue Xor0 = And.getOperand(XorIdx).getOperand(0);
+ SDValue Xor1 = And.getOperand(XorIdx).getOperand(1);
+ if (Other == Xor0)
+ std::swap(Xor0, Xor1);
+ if (Other != Xor1)
+ return false;
+ X = Xor0;
+ Y = Xor1;
+ M = And.getOperand(XorIdx ? 0 : 1);
+ return true;
+ };
+
+ SDValue A = N->getOperand(0);
+ SDValue B = N->getOperand(1);
+ if (!matchAndXor(A, 0, B) && !matchAndXor(A, 1, B) && !matchAndXor(B, 0, A) &&
+ !matchAndXor(B, 1, A))
+ return SDValue();
+
+ // Don't do anything if the mask is constant. This should not be reachable.
+ // InstCombine should have already unfolded this pattern, and DAGCombiner
+ // probably shouldn't produce it, too.
+ if (isa<ConstantSDNode>(M.getNode()))
+ return SDValue();
+
+ // We can transform if the target has AndNot
+ if (!TLI.hasAndNot(M))
+ return SDValue();
+
+ SDLoc DL(N);
+
+ SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
+ SDValue NotM = DAG.getNOT(DL, M, VT);
+ SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
+
+ return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
+}
+
SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
return Tmp;
+ // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
+ if (SDValue MM = unfoldMaskedMerge(N))
+ return MM;
+
// Simplify the expression using non-local knowledge.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
define i8 @in8(i8 %x, i8 %y, i8 %mask) {
; CHECK-LABEL: in8:
; CHECK: // %bb.0:
-; CHECK-NEXT: eor w8, w0, w1
-; CHECK-NEXT: and w8, w8, w2
-; CHECK-NEXT: eor w0, w8, w1
+; CHECK-NEXT: and w8, w0, w2
+; CHECK-NEXT: bic w9, w1, w2
+; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ret
%n0 = xor i8 %x, %y
%n1 = and i8 %n0, %mask
define i16 @in16(i16 %x, i16 %y, i16 %mask) {
; CHECK-LABEL: in16:
; CHECK: // %bb.0:
-; CHECK-NEXT: eor w8, w0, w1
-; CHECK-NEXT: and w8, w8, w2
-; CHECK-NEXT: eor w0, w8, w1
+; CHECK-NEXT: and w8, w0, w2
+; CHECK-NEXT: bic w9, w1, w2
+; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ret
%n0 = xor i16 %x, %y
%n1 = and i16 %n0, %mask
define i32 @in32(i32 %x, i32 %y, i32 %mask) {
; CHECK-LABEL: in32:
; CHECK: // %bb.0:
-; CHECK-NEXT: eor w8, w0, w1
-; CHECK-NEXT: and w8, w8, w2
-; CHECK-NEXT: eor w0, w8, w1
+; CHECK-NEXT: bic w8, w1, w2
+; CHECK-NEXT: and w9, w0, w2
+; CHECK-NEXT: orr w0, w9, w8
; CHECK-NEXT: ret
%n0 = xor i32 %x, %y
%n1 = and i32 %n0, %mask
define i64 @in64(i64 %x, i64 %y, i64 %mask) {
; CHECK-LABEL: in64:
; CHECK: // %bb.0:
-; CHECK-NEXT: eor x8, x0, x1
-; CHECK-NEXT: and x8, x8, x2
-; CHECK-NEXT: eor x0, x8, x1
+; CHECK-NEXT: bic x8, x1, x2
+; CHECK-NEXT: and x9, x0, x2
+; CHECK-NEXT: orr x0, x9, x8
; CHECK-NEXT: ret
%n0 = xor i64 %x, %y
%n1 = and i64 %n0, %mask
define i32 @in_commutativity_0_0_1(i32 %x, i32 %y, i32 %mask) {
; CHECK-LABEL: in_commutativity_0_0_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: eor w8, w0, w1
-; CHECK-NEXT: and w8, w2, w8
-; CHECK-NEXT: eor w0, w8, w1
+; CHECK-NEXT: bic w8, w1, w2
+; CHECK-NEXT: and w9, w0, w2
+; CHECK-NEXT: orr w0, w9, w8
; CHECK-NEXT: ret
%n0 = xor i32 %x, %y
%n1 = and i32 %mask, %n0 ; swapped
define i32 @in_commutativity_0_1_0(i32 %x, i32 %y, i32 %mask) {
; CHECK-LABEL: in_commutativity_0_1_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: eor w8, w0, w1
-; CHECK-NEXT: and w8, w8, w2
-; CHECK-NEXT: eor w0, w1, w8
+; CHECK-NEXT: bic w8, w1, w2
+; CHECK-NEXT: and w9, w0, w2
+; CHECK-NEXT: orr w0, w9, w8
; CHECK-NEXT: ret
%n0 = xor i32 %x, %y
%n1 = and i32 %n0, %mask
define i32 @in_commutativity_0_1_1(i32 %x, i32 %y, i32 %mask) {
; CHECK-LABEL: in_commutativity_0_1_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: eor w8, w0, w1
-; CHECK-NEXT: and w8, w2, w8
-; CHECK-NEXT: eor w0, w1, w8
+; CHECK-NEXT: bic w8, w1, w2
+; CHECK-NEXT: and w9, w0, w2
+; CHECK-NEXT: orr w0, w9, w8
; CHECK-NEXT: ret
%n0 = xor i32 %x, %y
%n1 = and i32 %mask, %n0 ; swapped
define i32 @in_commutativity_1_0_0(i32 %x, i32 %y, i32 %mask) {
; CHECK-LABEL: in_commutativity_1_0_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: eor w8, w0, w1
-; CHECK-NEXT: and w8, w8, w2
-; CHECK-NEXT: eor w0, w8, w0
+; CHECK-NEXT: bic w8, w0, w2
+; CHECK-NEXT: and w9, w1, w2
+; CHECK-NEXT: orr w0, w9, w8
; CHECK-NEXT: ret
%n0 = xor i32 %x, %y
%n1 = and i32 %n0, %mask
define i32 @in_commutativity_1_0_1(i32 %x, i32 %y, i32 %mask) {
; CHECK-LABEL: in_commutativity_1_0_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: eor w8, w0, w1
-; CHECK-NEXT: and w8, w2, w8
-; CHECK-NEXT: eor w0, w8, w0
+; CHECK-NEXT: bic w8, w0, w2
+; CHECK-NEXT: and w9, w1, w2
+; CHECK-NEXT: orr w0, w9, w8
; CHECK-NEXT: ret
%n0 = xor i32 %x, %y
%n1 = and i32 %mask, %n0 ; swapped
define i32 @in_commutativity_1_1_0(i32 %x, i32 %y, i32 %mask) {
; CHECK-LABEL: in_commutativity_1_1_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: eor w8, w0, w1
-; CHECK-NEXT: and w8, w8, w2
-; CHECK-NEXT: eor w0, w0, w8
+; CHECK-NEXT: bic w8, w0, w2
+; CHECK-NEXT: and w9, w1, w2
+; CHECK-NEXT: orr w0, w9, w8
; CHECK-NEXT: ret
%n0 = xor i32 %x, %y
%n1 = and i32 %n0, %mask
define i32 @in_commutativity_1_1_1(i32 %x, i32 %y, i32 %mask) {
; CHECK-LABEL: in_commutativity_1_1_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: eor w8, w0, w1
-; CHECK-NEXT: and w8, w2, w8
-; CHECK-NEXT: eor w0, w0, w8
+; CHECK-NEXT: bic w8, w0, w2
+; CHECK-NEXT: and w9, w1, w2
+; CHECK-NEXT: orr w0, w9, w8
; CHECK-NEXT: ret
%n0 = xor i32 %x, %y
%n1 = and i32 %mask, %n0 ; swapped
; CHECK-LABEL: in_complex_y0:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w1, w2
-; CHECK-NEXT: eor w9, w0, w8
-; CHECK-NEXT: and w9, w9, w3
-; CHECK-NEXT: eor w0, w9, w8
+; CHECK-NEXT: and w9, w0, w3
+; CHECK-NEXT: bic w8, w8, w3
+; CHECK-NEXT: orr w0, w9, w8
; CHECK-NEXT: ret
%y = and i32 %y_hi, %y_low
%n0 = xor i32 %x, %y
; CHECK-LABEL: in_complex_y1:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w1, w2
-; CHECK-NEXT: eor w9, w0, w8
-; CHECK-NEXT: and w9, w9, w3
-; CHECK-NEXT: eor w0, w8, w9
+; CHECK-NEXT: and w9, w0, w3
+; CHECK-NEXT: bic w8, w8, w3
+; CHECK-NEXT: orr w0, w9, w8
; CHECK-NEXT: ret
%y = and i32 %y_hi, %y_low
%n0 = xor i32 %x, %y
; CHECK-LABEL: in_complex_m0:
; CHECK: // %bb.0:
; CHECK-NEXT: eor w8, w2, w3
-; CHECK-NEXT: eor w9, w0, w1
-; CHECK-NEXT: and w8, w9, w8
-; CHECK-NEXT: eor w0, w8, w1
+; CHECK-NEXT: bic w9, w1, w8
+; CHECK-NEXT: and w8, w0, w8
+; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ret
%mask = xor i32 %m_a, %m_b
%n0 = xor i32 %x, %y
; CHECK-LABEL: in_complex_m1:
; CHECK: // %bb.0:
; CHECK-NEXT: eor w8, w2, w3
-; CHECK-NEXT: eor w9, w0, w1
-; CHECK-NEXT: and w8, w8, w9
-; CHECK-NEXT: eor w0, w8, w1
+; CHECK-NEXT: bic w9, w1, w8
+; CHECK-NEXT: and w8, w0, w8
+; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ret
%mask = xor i32 %m_a, %m_b
%n0 = xor i32 %x, %y
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w1, w2
; CHECK-NEXT: eor w9, w3, w4
-; CHECK-NEXT: eor w10, w0, w8
-; CHECK-NEXT: and w9, w10, w9
-; CHECK-NEXT: eor w0, w9, w8
+; CHECK-NEXT: bic w8, w8, w9
+; CHECK-NEXT: and w9, w0, w9
+; CHECK-NEXT: orr w0, w9, w8
; CHECK-NEXT: ret
%y = and i32 %y_hi, %y_low
%mask = xor i32 %m_a, %m_b
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w1, w2
; CHECK-NEXT: eor w9, w3, w4
-; CHECK-NEXT: eor w10, w0, w8
-; CHECK-NEXT: and w9, w10, w9
-; CHECK-NEXT: eor w0, w8, w9
+; CHECK-NEXT: bic w8, w8, w9
+; CHECK-NEXT: and w9, w0, w9
+; CHECK-NEXT: orr w0, w9, w8
; CHECK-NEXT: ret
%y = and i32 %y_hi, %y_low
%mask = xor i32 %m_a, %m_b
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w1, w2
; CHECK-NEXT: eor w9, w3, w4
-; CHECK-NEXT: eor w10, w0, w8
-; CHECK-NEXT: and w9, w9, w10
-; CHECK-NEXT: eor w0, w9, w8
+; CHECK-NEXT: bic w8, w8, w9
+; CHECK-NEXT: and w9, w0, w9
+; CHECK-NEXT: orr w0, w9, w8
; CHECK-NEXT: ret
%y = and i32 %y_hi, %y_low
%mask = xor i32 %m_a, %m_b
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w1, w2
; CHECK-NEXT: eor w9, w3, w4
-; CHECK-NEXT: eor w10, w0, w8
-; CHECK-NEXT: and w9, w9, w10
-; CHECK-NEXT: eor w0, w8, w9
+; CHECK-NEXT: bic w8, w8, w9
+; CHECK-NEXT: and w9, w0, w9
+; CHECK-NEXT: orr w0, w9, w8
; CHECK-NEXT: ret
%y = and i32 %y_hi, %y_low
%mask = xor i32 %m_a, %m_b
;
; CHECK-BMI-LABEL: in8:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: xorl %esi, %edi
+; CHECK-BMI-NEXT: andnl %esi, %edx, %eax
; CHECK-BMI-NEXT: andl %edx, %edi
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: movl %edi, %eax
+; CHECK-BMI-NEXT: orl %edi, %eax
+; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax
; CHECK-BMI-NEXT: retq
%n0 = xor i8 %x, %y
%n1 = and i8 %n0, %mask
;
; CHECK-BMI-LABEL: in16:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: xorl %esi, %edi
+; CHECK-BMI-NEXT: andnl %esi, %edx, %eax
; CHECK-BMI-NEXT: andl %edx, %edi
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: movl %edi, %eax
+; CHECK-BMI-NEXT: orl %edi, %eax
+; CHECK-BMI-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-BMI-NEXT: retq
%n0 = xor i16 %x, %y
%n1 = and i16 %n0, %mask
;
; CHECK-BMI-LABEL: in32:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: xorl %esi, %edi
+; CHECK-BMI-NEXT: andnl %esi, %edx, %eax
; CHECK-BMI-NEXT: andl %edx, %edi
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: movl %edi, %eax
+; CHECK-BMI-NEXT: orl %edi, %eax
; CHECK-BMI-NEXT: retq
%n0 = xor i32 %x, %y
%n1 = and i32 %n0, %mask
;
; CHECK-BMI-LABEL: in64:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: xorq %rsi, %rdi
+; CHECK-BMI-NEXT: andnq %rsi, %rdx, %rax
; CHECK-BMI-NEXT: andq %rdx, %rdi
-; CHECK-BMI-NEXT: xorq %rsi, %rdi
-; CHECK-BMI-NEXT: movq %rdi, %rax
+; CHECK-BMI-NEXT: orq %rdi, %rax
; CHECK-BMI-NEXT: retq
%n0 = xor i64 %x, %y
%n1 = and i64 %n0, %mask
;
; CHECK-BMI-LABEL: in_commutativity_0_0_1:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: xorl %esi, %edi
+; CHECK-BMI-NEXT: andnl %esi, %edx, %eax
; CHECK-BMI-NEXT: andl %edx, %edi
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: movl %edi, %eax
+; CHECK-BMI-NEXT: orl %edi, %eax
; CHECK-BMI-NEXT: retq
%n0 = xor i32 %x, %y
%n1 = and i32 %mask, %n0 ; swapped
;
; CHECK-BMI-LABEL: in_commutativity_0_1_0:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: xorl %esi, %edi
+; CHECK-BMI-NEXT: andnl %esi, %edx, %eax
; CHECK-BMI-NEXT: andl %edx, %edi
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: movl %edi, %eax
+; CHECK-BMI-NEXT: orl %edi, %eax
; CHECK-BMI-NEXT: retq
%n0 = xor i32 %x, %y
%n1 = and i32 %n0, %mask
;
; CHECK-BMI-LABEL: in_commutativity_0_1_1:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: xorl %esi, %edi
+; CHECK-BMI-NEXT: andnl %esi, %edx, %eax
; CHECK-BMI-NEXT: andl %edx, %edi
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: movl %edi, %eax
+; CHECK-BMI-NEXT: orl %edi, %eax
; CHECK-BMI-NEXT: retq
%n0 = xor i32 %x, %y
%n1 = and i32 %mask, %n0 ; swapped
;
; CHECK-BMI-LABEL: in_commutativity_1_0_0:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: xorl %edi, %esi
+; CHECK-BMI-NEXT: andnl %edi, %edx, %eax
; CHECK-BMI-NEXT: andl %edx, %esi
-; CHECK-BMI-NEXT: xorl %edi, %esi
-; CHECK-BMI-NEXT: movl %esi, %eax
+; CHECK-BMI-NEXT: orl %esi, %eax
; CHECK-BMI-NEXT: retq
%n0 = xor i32 %x, %y
%n1 = and i32 %n0, %mask
;
; CHECK-BMI-LABEL: in_commutativity_1_0_1:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: xorl %edi, %esi
+; CHECK-BMI-NEXT: andnl %edi, %edx, %eax
; CHECK-BMI-NEXT: andl %edx, %esi
-; CHECK-BMI-NEXT: xorl %edi, %esi
-; CHECK-BMI-NEXT: movl %esi, %eax
+; CHECK-BMI-NEXT: orl %esi, %eax
; CHECK-BMI-NEXT: retq
%n0 = xor i32 %x, %y
%n1 = and i32 %mask, %n0 ; swapped
;
; CHECK-BMI-LABEL: in_commutativity_1_1_0:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: xorl %edi, %esi
+; CHECK-BMI-NEXT: andnl %edi, %edx, %eax
; CHECK-BMI-NEXT: andl %edx, %esi
-; CHECK-BMI-NEXT: xorl %edi, %esi
-; CHECK-BMI-NEXT: movl %esi, %eax
+; CHECK-BMI-NEXT: orl %esi, %eax
; CHECK-BMI-NEXT: retq
%n0 = xor i32 %x, %y
%n1 = and i32 %n0, %mask
;
; CHECK-BMI-LABEL: in_commutativity_1_1_1:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: xorl %edi, %esi
+; CHECK-BMI-NEXT: andnl %edi, %edx, %eax
; CHECK-BMI-NEXT: andl %edx, %esi
-; CHECK-BMI-NEXT: xorl %edi, %esi
-; CHECK-BMI-NEXT: movl %esi, %eax
+; CHECK-BMI-NEXT: orl %esi, %eax
; CHECK-BMI-NEXT: retq
%n0 = xor i32 %x, %y
%n1 = and i32 %mask, %n0 ; swapped
; CHECK-BMI-LABEL: in_complex_y0:
; CHECK-BMI: # %bb.0:
; CHECK-BMI-NEXT: andl %edx, %esi
-; CHECK-BMI-NEXT: xorl %esi, %edi
; CHECK-BMI-NEXT: andl %ecx, %edi
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: movl %edi, %eax
+; CHECK-BMI-NEXT: andnl %esi, %ecx, %eax
+; CHECK-BMI-NEXT: orl %edi, %eax
; CHECK-BMI-NEXT: retq
%y = and i32 %y_hi, %y_low
%n0 = xor i32 %x, %y
; CHECK-BMI-LABEL: in_complex_y1:
; CHECK-BMI: # %bb.0:
; CHECK-BMI-NEXT: andl %edx, %esi
-; CHECK-BMI-NEXT: xorl %esi, %edi
; CHECK-BMI-NEXT: andl %ecx, %edi
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: movl %edi, %eax
+; CHECK-BMI-NEXT: andnl %esi, %ecx, %eax
+; CHECK-BMI-NEXT: orl %edi, %eax
; CHECK-BMI-NEXT: retq
%y = and i32 %y_hi, %y_low
%n0 = xor i32 %x, %y
; CHECK-BMI-LABEL: in_complex_m0:
; CHECK-BMI: # %bb.0:
; CHECK-BMI-NEXT: xorl %ecx, %edx
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: andl %edx, %edi
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: movl %edi, %eax
+; CHECK-BMI-NEXT: andnl %esi, %edx, %eax
+; CHECK-BMI-NEXT: andl %edi, %edx
+; CHECK-BMI-NEXT: orl %edx, %eax
; CHECK-BMI-NEXT: retq
%mask = xor i32 %m_a, %m_b
%n0 = xor i32 %x, %y
; CHECK-BMI-LABEL: in_complex_m1:
; CHECK-BMI: # %bb.0:
; CHECK-BMI-NEXT: xorl %ecx, %edx
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: andl %edx, %edi
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: movl %edi, %eax
+; CHECK-BMI-NEXT: andnl %esi, %edx, %eax
+; CHECK-BMI-NEXT: andl %edi, %edx
+; CHECK-BMI-NEXT: orl %edx, %eax
; CHECK-BMI-NEXT: retq
%mask = xor i32 %m_a, %m_b
%n0 = xor i32 %x, %y
; CHECK-BMI: # %bb.0:
; CHECK-BMI-NEXT: andl %edx, %esi
; CHECK-BMI-NEXT: xorl %r8d, %ecx
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: andl %ecx, %edi
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: movl %edi, %eax
+; CHECK-BMI-NEXT: andnl %esi, %ecx, %eax
+; CHECK-BMI-NEXT: andl %edi, %ecx
+; CHECK-BMI-NEXT: orl %ecx, %eax
; CHECK-BMI-NEXT: retq
%y = and i32 %y_hi, %y_low
%mask = xor i32 %m_a, %m_b
; CHECK-BMI: # %bb.0:
; CHECK-BMI-NEXT: andl %edx, %esi
; CHECK-BMI-NEXT: xorl %r8d, %ecx
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: andl %ecx, %edi
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: movl %edi, %eax
+; CHECK-BMI-NEXT: andnl %esi, %ecx, %eax
+; CHECK-BMI-NEXT: andl %edi, %ecx
+; CHECK-BMI-NEXT: orl %ecx, %eax
; CHECK-BMI-NEXT: retq
%y = and i32 %y_hi, %y_low
%mask = xor i32 %m_a, %m_b
; CHECK-BMI: # %bb.0:
; CHECK-BMI-NEXT: andl %edx, %esi
; CHECK-BMI-NEXT: xorl %r8d, %ecx
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: andl %ecx, %edi
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: movl %edi, %eax
+; CHECK-BMI-NEXT: andnl %esi, %ecx, %eax
+; CHECK-BMI-NEXT: andl %edi, %ecx
+; CHECK-BMI-NEXT: orl %ecx, %eax
; CHECK-BMI-NEXT: retq
%y = and i32 %y_hi, %y_low
%mask = xor i32 %m_a, %m_b
; CHECK-BMI: # %bb.0:
; CHECK-BMI-NEXT: andl %edx, %esi
; CHECK-BMI-NEXT: xorl %r8d, %ecx
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: andl %ecx, %edi
-; CHECK-BMI-NEXT: xorl %esi, %edi
-; CHECK-BMI-NEXT: movl %edi, %eax
+; CHECK-BMI-NEXT: andnl %esi, %ecx, %eax
+; CHECK-BMI-NEXT: andl %edi, %ecx
+; CHECK-BMI-NEXT: orl %ecx, %eax
; CHECK-BMI-NEXT: retq
%y = and i32 %y_hi, %y_low
%mask = xor i32 %m_a, %m_b