[DAG] add splat vector support for 'or' in SimplifyDemandedBits

author Sanjay Patel <spatel@rotateright.com>

Wed, 19 Apr 2017 22:00:00 +0000 (22:00 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Wed, 19 Apr 2017 22:00:00 +0000 (22:00 +0000)
author Sanjay Patel <spatel@rotateright.com>
Wed, 19 Apr 2017 22:00:00 +0000 (22:00 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Wed, 19 Apr 2017 22:00:00 +0000 (22:00 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 9f50bfe7bfbd9a81303e74a47b4aa454376f26e6..66afc905ca0000374f4dc22d60fc3bc7529936c5 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4225,8 +4225,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
      return Load;
  
    // Simplify the operands using demanded-bits information.
-  if (!VT.isVector() &&
-      SimplifyDemandedBits(SDValue(N, 0)))
+  if (SimplifyDemandedBits(SDValue(N, 0)))
      return SDValue(N, 0);
  
    return SDValue();
diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll

index e4cf296432ba99354bbd43418065ee137db6aa32..d7f52d2479885781e11936a4145c2005dcc859f1 100644 (file)
--- a/llvm/test/CodeGen/X86/combine-or.ll
+++ b/llvm/test/CodeGen/X86/combine-or.ll
@@ -430,6 +430,7 @@ define <4 x i32> @test2f(<4 x i32> %a, <4 x i32> %b) {
    ret <4 x i32> %or
  }
  
+; TODO: Why would we do this?
  ; (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
  
  define <2 x i64> @or_and_v2i64(<2 x i64> %a0) {
@@ -438,16 +439,17 @@ define <2 x i64> @or_and_v2i64(<2 x i64> %a0) {
  ; CHECK-NEXT:    andps {{.*}}(%rip), %xmm0
  ; CHECK-NEXT:    orps {{.*}}(%rip), %xmm0
  ; CHECK-NEXT:    retq
-  %1 = and <2 x i64> %a0, <i64 1, i64 1>
+  %1 = and <2 x i64> %a0, <i64 7, i64 7>
    %2 = or <2 x i64> %1, <i64 3, i64 3>
    ret <2 x i64> %2
  }
  
+; If all masked bits are going to be set, that's a constant fold.
+
  define <4 x i32> @or_and_v4i32(<4 x i32> %a0) {
  ; CHECK-LABEL: or_and_v4i32:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    andps {{.*}}(%rip), %xmm0
-; CHECK-NEXT:    orps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [3,3,3,3]
  ; CHECK-NEXT:    retq
    %1 = and <4 x i32> %a0, <i32 1, i32 1, i32 1, i32 1>
    %2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
@@ -459,9 +461,7 @@ define <4 x i32> @or_and_v4i32(<4 x i32> %a0) {
  define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) {
  ; CHECK-LABEL: or_zext_v2i32:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; CHECK-NEXT:    por {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967295]
  ; CHECK-NEXT:    retq
    %1 = zext <2 x i32> %a0 to <2 x i64>
    %2 = or <2 x i64> %1, <i64 4294967295, i64 4294967295>
@@ -471,9 +471,7 @@ define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) {
  define <4 x i32> @or_zext_v4i16(<4 x i16> %a0) {
  ; CHECK-LABEL: or_zext_v4i16:
  ; CHECK:       # BB#0:
-; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
-; CHECK-NEXT:    por {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [65535,65535,65535,65535]
  ; CHECK-NEXT:    retq
    %1 = zext <4 x i16> %a0 to <4 x i32>
    %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
diff --git a/llvm/test/CodeGen/X86/i64-to-float.ll b/llvm/test/CodeGen/X86/i64-to-float.ll

index 9626d64847fe59cb9e734707703a3f0644235339..3da1a360e2904060bfb8884704b8a682711ab29c 100644 (file)
--- a/llvm/test/CodeGen/X86/i64-to-float.ll
+++ b/llvm/test/CodeGen/X86/i64-to-float.ll
@@ -237,21 +237,19 @@ define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind {
  ; X64-SSE-NEXT:    pandn %xmm0, %xmm2
  ; X64-SSE-NEXT:    pand {{.*}}(%rip), %xmm3
  ; X64-SSE-NEXT:    por %xmm2, %xmm3
-; X64-SSE-NEXT:    movdqa %xmm3, %xmm0
-; X64-SSE-NEXT:    pxor %xmm1, %xmm0
-; X64-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [255,255]
-; X64-SSE-NEXT:    por %xmm2, %xmm1
-; X64-SSE-NEXT:    movdqa %xmm0, %xmm4
-; X64-SSE-NEXT:    pcmpgtd %xmm1, %xmm4
-; X64-SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; X64-SSE-NEXT:    pxor %xmm3, %xmm1
+; X64-SSE-NEXT:    movdqa {{.*#+}} xmm0 = [2147483903,2147483903]
+; X64-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X64-SSE-NEXT:    pcmpgtd %xmm0, %xmm2
+; X64-SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
  ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm1
  ; X64-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
-; X64-SSE-NEXT:    pand %xmm5, %xmm0
-; X64-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
+; X64-SSE-NEXT:    pand %xmm4, %xmm0
+; X64-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
  ; X64-SSE-NEXT:    por %xmm0, %xmm1
  ; X64-SSE-NEXT:    movdqa %xmm1, %xmm0
  ; X64-SSE-NEXT:    pandn %xmm3, %xmm0
-; X64-SSE-NEXT:    pand %xmm2, %xmm1
+; X64-SSE-NEXT:    pand {{.*}}(%rip), %xmm1
  ; X64-SSE-NEXT:    por %xmm0, %xmm1
  ; X64-SSE-NEXT:    movd %xmm1, %rax
  ; X64-SSE-NEXT:    xorps %xmm0, %xmm0
author	Sanjay Patel <spatel@rotateright.com>
	Wed, 19 Apr 2017 22:00:00 +0000 (22:00 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Wed, 19 Apr 2017 22:00:00 +0000 (22:00 +0000)
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/combine-or.ll		patch \| blob \| history
llvm/test/CodeGen/X86/i64-to-float.ll		patch \| blob \| history