[PPC64LE] Adjust vector splats during VSX swap optimization

author Bill Schmidt <wschmidt@linux.vnet.ibm.com>

Wed, 6 May 2015 15:40:46 +0000 (15:40 +0000)

committer Bill Schmidt <wschmidt@linux.vnet.ibm.com>

Wed, 6 May 2015 15:40:46 +0000 (15:40 +0000)
author Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Wed, 6 May 2015 15:40:46 +0000 (15:40 +0000)
committer Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Wed, 6 May 2015 15:40:46 +0000 (15:40 +0000)
diff --git a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp

index a1dc7f7..6aa25ff 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -88,7 +88,6 @@ struct PPCVSXSwapEntry {
  
  enum SHValues {
    SH_NONE = 0,
-  SH_BUILDVEC,
    SH_EXTRACT,
    SH_INSERT,
    SH_NOSWAP_LD,
@@ -329,7 +328,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
          // Splats are lane-sensitive, but we can use special handling
          // to adjust the source lane for the splat.  This is not yet
          // implemented.  When it is, we need to uncomment the following:
-        //        SwapVector[VecIdx].IsSwappable = 1;
+        SwapVector[VecIdx].IsSwappable = 1;
          SwapVector[VecIdx].SpecialHandling = SHValues::SH_SPLAT;
          break;
        // The presence of the following lane-sensitive operations in a
@@ -662,8 +661,12 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() {
        }
  
      } else if (SwapVector[EntryIdx].IsSwappable &&
-               SwapVector[EntryIdx].SpecialHandling != 0)
-      handleSpecialSwappables(EntryIdx);
+               SwapVector[EntryIdx].SpecialHandling != 0) {
+      int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
+
+      if (!SwapVector[Repr].WebRejected)
+        handleSpecialSwappables(EntryIdx);
+    }
    }
  }
  
@@ -672,6 +675,39 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() {
  // here.
  // FIXME: This code is to be phased in with subsequent patches.
  void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
+  switch (SwapVector[EntryIdx].SpecialHandling) {
+
+  default:
+    assert(false && "Unexpected special handling type");
+    break;
+
+  // For splats based on an index into a vector, add N/2 modulo N
+  // to the index, where N is the number of vector elements.
+  case SHValues::SH_SPLAT: {
+    MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+    unsigned NElts;
+
+    DEBUG(dbgs() << "Changing splat: ");
+    DEBUG(MI->dump());
+
+    switch (MI->getOpcode()) {
+    default:
+      assert(false && "Unexpected splat opcode");
+    case PPC::VSPLTB: NElts = 16; break;
+    case PPC::VSPLTH: NElts = 8;  break;
+    case PPC::VSPLTW: NElts = 4;  break;
+    }
+
+    unsigned EltNo = MI->getOperand(1).getImm();
+    EltNo = (EltNo + NElts / 2) % NElts;
+    MI->getOperand(1).setImm(EltNo);
+
+    DEBUG(dbgs() << "  Into: ");
+    DEBUG(MI->dump());
+    break;
+  }
+
+  }
  }
  
  // Walk the swap vector and replace each entry marked for removal with
@@ -734,9 +770,6 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
          break;
        case SH_NONE:
          break;
-      case SH_BUILDVEC:
-        DEBUG(dbgs() << "special:buildvec ");
-        break;
        case SH_EXTRACT:
          DEBUG(dbgs() << "special:extract ");
          break;
diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-2.ll b/llvm/test/CodeGen/PowerPC/swaps-le-2.ll

new file mode 100644 (file)

index 0000000..08096ed
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/swaps-le-2.ll
@@ -0,0 +1,91 @@
+; RUN: llc -O3 -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+; Test swap removal when a vector splat must be adjusted to make it legal.
+;
+; Test generated from following C code:
+;
+; vector char vc = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+; vector char vcr;
+; vector short vs = {0, 1, 2, 3, 4, 5, 6, 7};
+; vector short vsr;
+; vector int vi = {0, 1, 2, 3};
+; vector int vir;
+;
+; void cfoo ()
+; {
+;   vcr = (vector char){vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5],
+;                       vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5]};
+; }
+;
+; void sfoo ()
+; {
+;   vsr = (vector short){vs[6], vs[6], vs[6], vs[6],
+;                        vs[6], vs[6], vs[6], vs[6]};
+; }
+;
+; void ifoo ()
+; {
+;   vir = (vector int){vi[1], vi[1], vi[1], vi[1]};
+; }
+
+@vc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@vs = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@vi = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@vcr = common global <16 x i8> zeroinitializer, align 16
+@vsr = common global <8 x i16> zeroinitializer, align 16
+@vir = common global <4 x i32> zeroinitializer, align 16
+
+; Function Attrs: nounwind
+define void @cfoo() {
+entry:
+  %0 = load <16 x i8>, <16 x i8>* @vc, align 16
+  %vecinit30 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  store <16 x i8> %vecinit30, <16 x i8>* @vcr, align 16
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @sfoo() {
+entry:
+  %0 = load <8 x i16>, <8 x i16>* @vs, align 16
+  %vecinit14 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
+  store <8 x i16> %vecinit14, <8 x i16>* @vsr, align 16
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @ifoo() {
+entry:
+  %0 = load <4 x i32>, <4 x i32>* @vi, align 16
+  %vecinit6 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  store <4 x i32> %vecinit6, <4 x i32>* @vir, align 16
+  ret void
+}
+
+; Justification:
+;  Byte splat of element 5 (BE) becomes element 15-5 = 10 (LE)
+;  which becomes (10+8)%16 = 2 (LE swapped).
+;
+;  Halfword splat of element 6 (BE) becomes element 7-6 = 1 (LE)
+;  which becomes (1+4)%8 = 5 (LE swapped).
+;
+;  Word splat of element 1 (BE) becomes element 3-1 = 2 (LE)
+;  which becomes (2+2)%4 = 0 (LE swapped).
+
+; CHECK-NOT: xxpermdi
+; CHECK-NOT: xxswapd
+
+; CHECK-LABEL: @cfoo
+; CHECK: lxvd2x
+; CHECK: vspltb {{[0-9]+}}, {{[0-9]+}}, 2
+; CHECK: stxvd2x
+
+; CHECK-LABEL: @sfoo
+; CHECK: lxvd2x
+; CHECK: vsplth {{[0-9]+}}, {{[0-9]+}}, 5
+; CHECK: stxvd2x
+
+; CHECK-LABEL: @ifoo
+; CHECK: lxvd2x
+; CHECK: vspltw {{[0-9]+}}, {{[0-9]+}}, 0
+; CHECK: stxvd2x
author	Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	Wed, 6 May 2015 15:40:46 +0000 (15:40 +0000)
committer	Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	Wed, 6 May 2015 15:40:46 +0000 (15:40 +0000)
llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp		patch \| blob \| history
llvm/test/CodeGen/PowerPC/swaps-le-2.ll	[new file with mode: 0644]	patch \| blob