return SDValue();
}
+/// OR combines for which the commuted variant will be tried as well.
+static SDValue visitORCommutative(
+ SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
+ EVT VT = N0.getValueType();
+ if (N0.getOpcode() == ISD::AND) {
+ // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
+ if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
+
+ // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
+ if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
}
}
+ if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
+ return Combined;
+ if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
+ return Combined;
+
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
if (N0.getOpcode() == N1.getOpcode())
if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
; CHECK-NEXT: movi v1.4s, #42
; CHECK-NEXT: add v1.4s, v0.4s, v1.4s
; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
%c = icmp ugt <4 x i32> %x, %a
; CHECK-NEXT: mvni v2.4s, #42
; CHECK-NEXT: add v1.4s, v0.4s, v1.4s
; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
%c = icmp ugt <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: add v1.2d, v0.2d, v1.2d
; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d
-; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%a = add <2 x i64> %x, <i64 42, i64 42>
%c = icmp ugt <2 x i64> %x, %a
; CHECK-NEXT: dup v2.2d, x9
; CHECK-NEXT: add v1.2d, v0.2d, v1.2d
; CHECK-NEXT: cmhi v0.2d, v0.2d, v2.2d
-; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%a = add <2 x i64> %x, <i64 42, i64 42>
%c = icmp ugt <2 x i64> %x, <i64 -43, i64 -43>
; CHECK: // %bb.0:
; CHECK-NEXT: add v1.4s, v0.4s, v1.4s
; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%a = add <4 x i32> %x, %y
%c = icmp ugt <4 x i32> %x, %a
; CHECK-NEXT: mvn v2.16b, v1.16b
; CHECK-NEXT: add v1.4s, v0.4s, v1.4s
; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%a = add <4 x i32> %x, %y
; CHECK: // %bb.0:
; CHECK-NEXT: add v1.2d, v0.2d, v1.2d
; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d
-; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%a = add <2 x i64> %x, %y
%c = icmp ugt <2 x i64> %x, %a
; CHECK-NEXT: mvn v2.16b, v1.16b
; CHECK-NEXT: add v1.2d, v0.2d, v1.2d
; CHECK-NEXT: cmhi v0.2d, v0.2d, v2.2d
-; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%noty = xor <2 x i64> %y, <i64 -1, i64 -1>
%a = add <2 x i64> %x, %y
define i32 @out_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) {
; CHECK-LABEL: out_constant_varx_mone_invmask:
; CHECK: // %bb.0:
-; CHECK-NEXT: bic w8, w0, w2
-; CHECK-NEXT: orr w0, w8, w2
+; CHECK-NEXT: orr w0, w0, w2
; CHECK-NEXT: ret
%notmask = xor i32 %mask, -1
%mx = and i32 %notmask, %x
define i32 @out_constant_mone_vary(i32 %x, i32 %y, i32 %mask) {
; CHECK-LABEL: out_constant_mone_vary:
; CHECK: // %bb.0:
-; CHECK-NEXT: bic w8, w1, w2
-; CHECK-NEXT: orr w0, w2, w8
+; CHECK-NEXT: orr w0, w1, w2
; CHECK-NEXT: ret
%notmask = xor i32 %mask, -1
%mx = and i32 %mask, -1
define <4 x i32> @out_constant_varx_mone_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
; CHECK-LABEL: out_constant_varx_mone_invmask:
; CHECK: // %bb.0:
-; CHECK-NEXT: bic v0.16b, v0.16b, v2.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
define <4 x i32> @out_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
; CHECK-LABEL: out_constant_mone_vary:
; CHECK: // %bb.0:
-; CHECK-NEXT: bic v0.16b, v1.16b, v2.16b
-; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
+; CHECK-NEXT: orr v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
%mx = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
define <4 x i32> @in_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
; CHECK-LABEL: in_constant_mone_vary:
; CHECK: // %bb.0:
-; CHECK-NEXT: bic v0.16b, v1.16b, v2.16b
-; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
+; CHECK-NEXT: orr v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
%n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
%n1 = and <4 x i32> %n0, %mask
define i32 @out_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) {
; CHECK-NOBMI-LABEL: out_constant_varx_mone_invmask:
; CHECK-NOBMI: # %bb.0:
-; CHECK-NOBMI-NEXT: movl %edx, %eax
-; CHECK-NOBMI-NEXT: notl %eax
-; CHECK-NOBMI-NEXT: andl %edi, %eax
+; CHECK-NOBMI-NEXT: movl %edi, %eax
; CHECK-NOBMI-NEXT: orl %edx, %eax
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI-LABEL: out_constant_varx_mone_invmask:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: andnl %edi, %edx, %eax
+; CHECK-BMI-NEXT: movl %edi, %eax
; CHECK-BMI-NEXT: orl %edx, %eax
; CHECK-BMI-NEXT: retq
%notmask = xor i32 %mask, -1
define i32 @out_constant_mone_vary(i32 %x, i32 %y, i32 %mask) {
; CHECK-NOBMI-LABEL: out_constant_mone_vary:
; CHECK-NOBMI: # %bb.0:
-; CHECK-NOBMI-NEXT: movl %edx, %eax
-; CHECK-NOBMI-NEXT: notl %eax
-; CHECK-NOBMI-NEXT: andl %esi, %eax
+; CHECK-NOBMI-NEXT: movl %esi, %eax
; CHECK-NOBMI-NEXT: orl %edx, %eax
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI-LABEL: out_constant_mone_vary:
; CHECK-BMI: # %bb.0:
-; CHECK-BMI-NEXT: andnl %esi, %edx, %eax
+; CHECK-BMI-NEXT: movl %esi, %eax
; CHECK-BMI-NEXT: orl %edx, %eax
; CHECK-BMI-NEXT: retq
%notmask = xor i32 %mask, -1
; CHECK-SSE1-LABEL: out_constant_varx_mone_invmask:
; CHECK-SSE1: # %bb.0:
; CHECK-SSE1-NEXT: movq %rdi, %rax
-; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
-; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
-; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1
-; CHECK-SSE1-NEXT: orps %xmm0, %xmm1
-; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
+; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
+; CHECK-SSE1-NEXT: orps (%rcx), %xmm0
+; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
; CHECK-SSE1-NEXT: retq
;
; CHECK-SSE2-LABEL: out_constant_varx_mone_invmask:
; CHECK-SSE2: # %bb.0:
-; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1
-; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
-; CHECK-SSE2-NEXT: andnps (%rdi), %xmm0
-; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
+; CHECK-SSE2-NEXT: movaps (%rdi), %xmm0
+; CHECK-SSE2-NEXT: orps (%rdx), %xmm0
; CHECK-SSE2-NEXT: retq
;
; CHECK-XOP-LABEL: out_constant_varx_mone_invmask:
; CHECK-XOP: # %bb.0:
-; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
-; CHECK-XOP-NEXT: vandnps (%rdi), %xmm0, %xmm1
-; CHECK-XOP-NEXT: vorps %xmm0, %xmm1, %xmm0
+; CHECK-XOP-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0
; CHECK-XOP-NEXT: retq
%x = load <4 x i32>, <4 x i32> *%px, align 16
%y = load <4 x i32>, <4 x i32> *%py, align 16
; CHECK-SSE1-LABEL: out_constant_mone_vary:
; CHECK-SSE1: # %bb.0:
; CHECK-SSE1-NEXT: movq %rdi, %rax
-; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
-; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
-; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1
-; CHECK-SSE1-NEXT: orps %xmm0, %xmm1
-; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
+; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0
+; CHECK-SSE1-NEXT: orps (%rcx), %xmm0
+; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
; CHECK-SSE1-NEXT: retq
;
; CHECK-SSE2-LABEL: out_constant_mone_vary:
; CHECK-SSE2: # %bb.0:
-; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1
-; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
-; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0
-; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
+; CHECK-SSE2-NEXT: movaps (%rsi), %xmm0
+; CHECK-SSE2-NEXT: orps (%rdx), %xmm0
; CHECK-SSE2-NEXT: retq
;
; CHECK-XOP-LABEL: out_constant_mone_vary:
; CHECK-XOP: # %bb.0:
-; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
-; CHECK-XOP-NEXT: vandnps (%rsi), %xmm0, %xmm1
-; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0
+; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0
+; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0
; CHECK-XOP-NEXT: retq
%x = load <4 x i32>, <4 x i32> *%px, align 16
%y = load <4 x i32>, <4 x i32> *%py, align 16
;
; CHECK-SSE2-LABEL: in_constant_mone_vary:
; CHECK-SSE2: # %bb.0:
-; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1
-; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
-; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0
-; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
+; CHECK-SSE2-NEXT: movaps (%rsi), %xmm0
+; CHECK-SSE2-NEXT: orps (%rdx), %xmm0
; CHECK-SSE2-NEXT: retq
;
; CHECK-XOP-LABEL: in_constant_mone_vary:
; CHECK-XOP: # %bb.0:
-; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
-; CHECK-XOP-NEXT: vandnps (%rsi), %xmm0, %xmm1
-; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0
+; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0
+; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0
; CHECK-XOP-NEXT: retq
%x = load <4 x i32>, <4 x i32> *%px, align 16
%y = load <4 x i32>, <4 x i32> *%py, align 16