Additional fixes for bug 15155.

author Bill Schmidt <wschmidt@linux.vnet.ibm.com>

Wed, 20 Feb 2013 20:41:42 +0000 (20:41 +0000)

committer Bill Schmidt <wschmidt@linux.vnet.ibm.com>

Wed, 20 Feb 2013 20:41:42 +0000 (20:41 +0000)
author Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Wed, 20 Feb 2013 20:41:42 +0000 (20:41 +0000)
committer Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Wed, 20 Feb 2013 20:41:42 +0000 (20:41 +0000)
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

index 01d731a80957e35fb129083c75552216c11cb54b..14535065d55c487f9b7f61fb717f70fe3f89399c 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1323,34 +1323,75 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
                                    SDValue(Tmp, 0), GA);
    }
    case PPCISD::VADD_SPLAT: {
-    // Convert: VADD_SPLAT elt, size
-    // Into:    tmp = VSPLTIS[BHW] elt
-    //          VADDU[BHW]M tmp, tmp
-    // Where:   [BHW] = B for size = 1, H for size = 2, W for size = 4
+    // This expands into one of three sequences, depending on whether
+    // the first operand is odd or even, positive or negative.
      assert(isa<ConstantSDNode>(N->getOperand(0)) &&
             isa<ConstantSDNode>(N->getOperand(1)) &&
             "Invalid operand on VADD_SPLAT!");
+
+    int Elt     = N->getConstantOperandVal(0);
      int EltSize = N->getConstantOperandVal(1);
-    unsigned Opc1, Opc2;
+    unsigned Opc1, Opc2, Opc3;
      EVT VT;
+
      if (EltSize == 1) {
        Opc1 = PPC::VSPLTISB;
        Opc2 = PPC::VADDUBM;
+      Opc3 = PPC::VSUBUBM;
        VT = MVT::v16i8;
      } else if (EltSize == 2) {
        Opc1 = PPC::VSPLTISH;
        Opc2 = PPC::VADDUHM;
+      Opc3 = PPC::VSUBUHM;
        VT = MVT::v8i16;
      } else {
        assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
        Opc1 = PPC::VSPLTISW;
        Opc2 = PPC::VADDUWM;
+      Opc3 = PPC::VSUBUWM;
        VT = MVT::v4i32;
      }
-    SDValue Elt = getI32Imm(N->getConstantOperandVal(0));
-    SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, Elt);
-    SDValue TmpVal = SDValue(Tmp, 0);
-    return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
+
+    if ((Elt & 1) == 0) {
+      // Elt is even, in the range [-32,-18] + [16,30].
+      //
+      // Convert: VADD_SPLAT elt, size
+      // Into:    tmp = VSPLTIS[BHW] elt
+      //          VADDU[BHW]M tmp, tmp
+      // Where:   [BHW] = B for size = 1, H for size = 2, W for size = 4
+      SDValue EltVal = getI32Imm(Elt >> 1);
+      SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+      SDValue TmpVal = SDValue(Tmp, 0);
+      return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
+
+    } else if (Elt > 0) {
+      // Elt is odd and positive, in the range [17,31].
+      //
+      // Convert: VADD_SPLAT elt, size
+      // Into:    tmp1 = VSPLTIS[BHW] elt-16
+      //          tmp2 = VSPLTIS[BHW] -16
+      //          VSUBU[BHW]M tmp1, tmp2
+      SDValue EltVal = getI32Imm(Elt - 16);
+      SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+      EltVal = getI32Imm(-16);
+      SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+      return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
+                                    SDValue(Tmp2, 0));
+
+    } else {
+      // Elt is odd and negative, in the range [-31,-17].
+      //
+      // Convert: VADD_SPLAT elt, size
+      // Into:    tmp1 = VSPLTIS[BHW] elt+16
+      //          tmp2 = VSPLTIS[BHW] -16
+      //          VADDU[BHW]M tmp1, tmp2
+      SDValue EltVal = getI32Imm(Elt + 16);
+      SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+      EltVal = getI32Imm(-16);
+      SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+      return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
+                                    SDValue(Tmp2, 0));
+    }
    }
    }
  
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

index 338d73f0b5ff124ba461300cfa72af24719384d6..6d2aacd9047a7d2d309fd51d905bae945e71b6e9 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5025,11 +5025,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
    // Two instruction sequences.
  
    // If this value is in the range [-32,30] and is even, use:
-  //    tmp = VSPLTI[bhw], result = add tmp, tmp
-  if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
-    // To avoid having the optimization undone by constant folding, we
-    // convert to a pseudo that will be expanded later.
-    SDValue Elt = DAG.getConstant(SextVal >> 1, MVT::i32);
+  //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
+  // If this value is in the range [17,31] and is odd, use:
+  //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
+  // If this value is in the range [-31,-17] and is odd, use:
+  //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
+  // Note the last two are three-instruction sequences.
+  if (SextVal >= -32 && SextVal <= 31) {
+    // To avoid having these optimizations undone by constant folding,
+    // we convert to a pseudo that will be expanded later into one of
+    // the above forms.
+    SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
      EVT VT = Op.getValueType();
      int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4);
      SDValue EltSize = DAG.getConstant(Size, MVT::i32);
@@ -5129,25 +5135,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
      }
    }
  
-  // Three instruction sequences.
-
-  // Odd, in range [17,31]:  (vsplti C)-(vsplti -16).
-  // FIXME: Disabled because the add gets constant folded.
-  if (0 && SextVal >= 0 && SextVal <= 31) {
-    SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
-    SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
-    LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
-    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
-  }
-  // Odd, in range [-31,-17]:  (vsplti C)+(vsplti -16).
-  // FIXME: Disabled because the add gets constant folded.
-  if (0 && SextVal >= -31 && SextVal <= 0) {
-    SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
-    SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
-    LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
-    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
-  }
-
    return SDValue();
  }
  
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h

index 7cc2d1ac322d96d25a19779eb2efb3d0bde7e670..1fa88f32523fb39212396b85d12d1c910c6e9839 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -238,8 +238,9 @@ namespace llvm {
        ADDI_DTPREL_L,
  
        /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
-      /// into an ADD of a VSPLTI with itself during instruction selection.
-      /// Necessary to avoid losing this optimization due to constant folds.
+      /// during instruction selection to optimize a BUILD_VECTOR into
+      /// operations on splats.  This is necessary to avoid losing these
+      /// optimizations due to constant folding.
        VADD_SPLAT,
  
        /// STD_32 - This is the STD instruction for use with "32-bit" registers.
diff --git a/llvm/test/CodeGen/PowerPC/vaddsplat.ll b/llvm/test/CodeGen/PowerPC/vaddsplat.ll

index b4c16c1bf91d7927f02f18c60f57ca7d34012848..e65148aff03acedd3891609f9c98482ba8e41a7d 100644 (file)
--- a/llvm/test/CodeGen/PowerPC/vaddsplat.ll
+++ b/llvm/test/CodeGen/PowerPC/vaddsplat.ll
@@ -1,6 +1,6 @@
  ; RUN: llc -O0 -mcpu=pwr7 <%s | FileCheck %s
  
-; Test optimization of build_vector into vadd/vsplt for 6-bit immediates.
+; Test optimizations of build_vector for 6-bit immediates.
  
  target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
  target triple = "powerpc64-unknown-linux-gnu"
@@ -9,69 +9,141 @@ target triple = "powerpc64-unknown-linux-gnu"
  %v8i16 = type <8 x i16>
  %v16i8 = type <16 x i8>
  
-define void @test_v4i32_pos(%v4i32* %P, %v4i32* %S) {
+define void @test_v4i32_pos_even(%v4i32* %P, %v4i32* %S) {
         %p = load %v4i32* %P
         %r = add %v4i32 %p, < i32 18, i32 18, i32 18, i32 18 >
         store %v4i32 %r, %v4i32* %S
         ret void
  }
  
-; CHECK: test_v4i32_pos:
+; CHECK: test_v4i32_pos_even:
  ; CHECK: vspltisw [[REG1:[0-9]+]], 9
  ; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
  
-define void @test_v4i32_neg(%v4i32* %P, %v4i32* %S) {
+define void @test_v4i32_neg_even(%v4i32* %P, %v4i32* %S) {
         %p = load %v4i32* %P
         %r = add %v4i32 %p, < i32 -28, i32 -28, i32 -28, i32 -28 >
         store %v4i32 %r, %v4i32* %S
         ret void
  }
  
-; CHECK: test_v4i32_neg:
+; CHECK: test_v4i32_neg_even:
  ; CHECK: vspltisw [[REG1:[0-9]+]], -14
  ; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
  
-define void @test_v8i16_pos(%v8i16* %P, %v8i16* %S) {
+define void @test_v8i16_pos_even(%v8i16* %P, %v8i16* %S) {
         %p = load %v8i16* %P
         %r = add %v8i16 %p, < i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30 >
         store %v8i16 %r, %v8i16* %S
         ret void
  }
  
-; CHECK: test_v8i16_pos:
+; CHECK: test_v8i16_pos_even:
  ; CHECK: vspltish [[REG1:[0-9]+]], 15
  ; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
  
-define void @test_v8i16_neg(%v8i16* %P, %v8i16* %S) {
+define void @test_v8i16_neg_even(%v8i16* %P, %v8i16* %S) {
         %p = load %v8i16* %P
         %r = add %v8i16 %p, < i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32 >
         store %v8i16 %r, %v8i16* %S
         ret void
  }
  
-; CHECK: test_v8i16_neg:
+; CHECK: test_v8i16_neg_even:
  ; CHECK: vspltish [[REG1:[0-9]+]], -16
  ; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
  
-define void @test_v16i8_pos(%v16i8* %P, %v16i8* %S) {
+define void @test_v16i8_pos_even(%v16i8* %P, %v16i8* %S) {
         %p = load %v16i8* %P
         %r = add %v16i8 %p, < i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16 >
         store %v16i8 %r, %v16i8* %S
         ret void
  }
  
-; CHECK: test_v16i8_pos:
+; CHECK: test_v16i8_pos_even:
  ; CHECK: vspltisb [[REG1:[0-9]+]], 8
  ; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
  
-define void @test_v16i8_neg(%v16i8* %P, %v16i8* %S) {
+define void @test_v16i8_neg_even(%v16i8* %P, %v16i8* %S) {
         %p = load %v16i8* %P
         %r = add %v16i8 %p, < i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18 >
         store %v16i8 %r, %v16i8* %S
         ret void
  }
  
-; CHECK: test_v16i8_neg:
+; CHECK: test_v16i8_neg_even:
  ; CHECK: vspltisb [[REG1:[0-9]+]], -9
  ; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
  
+define void @test_v4i32_pos_odd(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 27, i32 27, i32 27, i32 27 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_pos_odd:
+; CHECK: vspltisw [[REG2:[0-9]+]], -16
+; CHECK: vspltisw [[REG1:[0-9]+]], 11
+; CHECK: vsubuwm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v4i32_neg_odd(%v4i32* %P, %v4i32* %S) {
+       %p = load %v4i32* %P
+       %r = add %v4i32 %p, < i32 -27, i32 -27, i32 -27, i32 -27 >
+       store %v4i32 %r, %v4i32* %S
+       ret void
+}
+
+; CHECK: test_v4i32_neg_odd:
+; CHECK: vspltisw [[REG2:[0-9]+]], -16
+; CHECK: vspltisw [[REG1:[0-9]+]], -11
+; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v8i16_pos_odd(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_pos_odd:
+; CHECK: vspltish [[REG2:[0-9]+]], -16
+; CHECK: vspltish [[REG1:[0-9]+]], 15
+; CHECK: vsubuhm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v8i16_neg_odd(%v8i16* %P, %v8i16* %S) {
+       %p = load %v8i16* %P
+       %r = add %v8i16 %p, < i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31 >
+       store %v8i16 %r, %v8i16* %S
+       ret void
+}
+
+; CHECK: test_v8i16_neg_odd:
+; CHECK: vspltish [[REG2:[0-9]+]], -16
+; CHECK: vspltish [[REG1:[0-9]+]], -15
+; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v16i8_pos_odd(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_pos_odd:
+; CHECK: vspltisb [[REG2:[0-9]+]], -16
+; CHECK: vspltisb [[REG1:[0-9]+]], 1
+; CHECK: vsububm {{[0-9]+}}, [[REG1]], [[REG2]]
+
+define void @test_v16i8_neg_odd(%v16i8* %P, %v16i8* %S) {
+       %p = load %v16i8* %P
+       %r = add %v16i8 %p, < i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17 >
+       store %v16i8 %r, %v16i8* %S
+       ret void
+}
+
+; CHECK: test_v16i8_neg_odd:
+; CHECK: vspltisb [[REG2:[0-9]+]], -16
+; CHECK: vspltisb [[REG1:[0-9]+]], -1
+; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG2]]
+
diff --git a/llvm/test/CodeGen/PowerPC/vec_constants.ll b/llvm/test/CodeGen/PowerPC/vec_constants.ll

index 71aa35920c82277fbb5e6a1c0f96cc3c20319257..56b41b0f895582954846d2a3a75be6b71325adfd 100644 (file)
--- a/llvm/test/CodeGen/PowerPC/vec_constants.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_constants.ll
@@ -1,5 +1,4 @@
  ; RUN: llc < %s -march=ppc32 -mcpu=g5 | FileCheck %s
-; XFAIL: *
  
  define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
         %tmp = load <4 x i32>* %P1              ; <<4 x i32>> [#uses=1]
author	Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	Wed, 20 Feb 2013 20:41:42 +0000 (20:41 +0000)
committer	Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	Wed, 20 Feb 2013 20:41:42 +0000 (20:41 +0000)
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCISelLowering.h		patch \| blob \| history
llvm/test/CodeGen/PowerPC/vaddsplat.ll		patch \| blob \| history
llvm/test/CodeGen/PowerPC/vec_constants.ll		patch \| blob \| history