Teach DAG combine to handle vector add/sub with vectors of all 0s.

author Craig Topper <craig.topper@gmail.com>

Mon, 10 Dec 2012 08:12:29 +0000 (08:12 +0000)

committer Craig Topper <craig.topper@gmail.com>

Mon, 10 Dec 2012 08:12:29 +0000 (08:12 +0000)
author Craig Topper <craig.topper@gmail.com>
Mon, 10 Dec 2012 08:12:29 +0000 (08:12 +0000)
committer Craig Topper <craig.topper@gmail.com>
Mon, 10 Dec 2012 08:12:29 +0000 (08:12 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 6a43337..e774c13 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1381,6 +1381,12 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
    if (VT.isVector()) {
      SDValue FoldedVOp = SimplifyVBinOp(N);
      if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (add x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N1;
    }
  
    // fold (add x, undef) -> undef
@@ -1624,6 +1630,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
    if (VT.isVector()) {
      SDValue FoldedVOp = SimplifyVBinOp(N);
      if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (sub x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
    }
  
    // fold (sub x, x) -> 0
diff --git a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll

index 6cd2761..2bde76e 100644 (file)
--- a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -43,11 +43,11 @@ forbody:            ; preds = %forcond
         %mul171.i = fmul <4 x float> %add167.i, %sub140.i               ; <<4 x float>> [#uses=1]
         %add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >              ; <<4 x float>> [#uses=1]
         %bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32>              ; <<4 x i32>> [#uses=1]
-       %andnps178.i = add <4 x i32> %bitcast176.i, zeroinitializer             ; <<4 x i32>> [#uses=1]
+       %andnps178.i = add <4 x i32> %bitcast176.i, <i32 1, i32 1, i32 1, i32 1>                ; <<4 x i32>> [#uses=1]
         %bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float>           ; <<4 x float>> [#uses=1]
         %mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer             ; <<4 x float>> [#uses=1]
         %bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32>              ; <<4 x i32>> [#uses=1]
-       %andnps192.i = add <4 x i32> %bitcast190.i, zeroinitializer             ; <<4 x i32>> [#uses=1]
+       %andnps192.i = add <4 x i32> %bitcast190.i, <i32 1, i32 1, i32 1, i32 1>                ; <<4 x i32>> [#uses=1]
         %xorps.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >            ; <<4 x i32>> [#uses=1]
         %orps203.i = add <4 x i32> %andnps192.i, %xorps.i               ; <<4 x i32>> [#uses=1]
         %bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float>             ; <<4 x float>> [#uses=1]
@@ -55,9 +55,9 @@ forbody:              ; preds = %forcond
         %mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer               ; <<4 x float>> [#uses=1]
         %cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind          ; <<4 x float>> [#uses=1]
         %bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32>          ; <<4 x i32>> [#uses=2]
-       %andps.i14 = add <4 x i32> zeroinitializer, %bitcast6.i13               ; <<4 x i32>> [#uses=1]
+       %andps.i14 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %bitcast6.i13          ; <<4 x i32>> [#uses=1]
         %not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 >              ; <<4 x i32>> [#uses=1]
-       %andnps.i17 = add <4 x i32> zeroinitializer, %not.i16           ; <<4 x i32>> [#uses=1]
+       %andnps.i17 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %not.i16              ; <<4 x i32>> [#uses=1]
         %orps.i18 = or <4 x i32> %andnps.i17, %andps.i14                ; <<4 x i32>> [#uses=1]
         %bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float>             ; <<4 x float>> [#uses=1]
         %tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind               ; <<4 x float>> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/vec_zero.ll b/llvm/test/CodeGen/X86/vec_zero.ll

index 682a0df..c3ea0ad 100644 (file)
--- a/llvm/test/CodeGen/X86/vec_zero.ll
+++ b/llvm/test/CodeGen/X86/vec_zero.ll
@@ -13,7 +13,7 @@ define void @foo(<4 x float>* %P) {
  ; CHECK: pxor
  define void @bar(<4 x i32>* %P) {
          %T = load <4 x i32>* %P         ; <<4 x i32>> [#uses=1]
-        %S = add <4 x i32> zeroinitializer, %T          ; <<4 x i32>> [#uses=1]
+        %S = sub <4 x i32> zeroinitializer, %T          ; <<4 x i32>> [#uses=1]
          store <4 x i32> %S, <4 x i32>* %P
          ret void
  }
author	Craig Topper <craig.topper@gmail.com>
	Mon, 10 Dec 2012 08:12:29 +0000 (08:12 +0000)
committer	Craig Topper <craig.topper@gmail.com>
	Mon, 10 Dec 2012 08:12:29 +0000 (08:12 +0000)
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll		patch \| blob \| history
llvm/test/CodeGen/X86/vec_zero.ll		patch \| blob \| history