[LSV] Vectorize up to side-effecting instructions.

author Justin Lebar <jlebar@google.com>

Wed, 20 Jul 2016 20:07:34 +0000 (20:07 +0000)

committer Justin Lebar <jlebar@google.com>

Wed, 20 Jul 2016 20:07:34 +0000 (20:07 +0000)
author Justin Lebar <jlebar@google.com>
Wed, 20 Jul 2016 20:07:34 +0000 (20:07 +0000)
committer Justin Lebar <jlebar@google.com>
Wed, 20 Jul 2016 20:07:34 +0000 (20:07 +0000)
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

index ec3e734..674c451 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -439,13 +439,10 @@ ArrayRef<Value *> Vectorizer::getVectorizablePrefix(ArrayRef<Value *> Chain) {
          ChainInstrs.push_back({&I, InstrIdx});
      } else if (I.mayHaveSideEffects()) {
        DEBUG(dbgs() << "LSV: Found side-effecting operation: " << I << '\n');
-      return 0;
+      break;
      }
    }
  
-  assert(Chain.size() == ChainInstrs.size() &&
-         "All instrs in Chain must be within range getBoundaryInstrs(Chain).");
-
    // Loop until we find an instruction in ChainInstrs that we can't vectorize.
    unsigned ChainInstrIdx, ChainInstrsLen;
    for (ChainInstrIdx = 0, ChainInstrsLen = ChainInstrs.size();
@@ -479,7 +476,6 @@ ArrayRef<Value *> Vectorizer::getVectorizablePrefix(ArrayRef<Value *> Chain) {
          DEBUG({
            Value *Ptr0 = getPointerOperand(M0);
            Value *Ptr1 = getPointerOperand(M1);
-
            dbgs() << "LSV: Found alias:\n"
                      "  Aliasing instruction and pointer:\n"
                   << "  " << *MemInstr << '\n'
@@ -713,7 +709,7 @@ bool Vectorizer::vectorizeStoreChain(
  
    ArrayRef<Value *> NewChain = getVectorizablePrefix(Chain);
    if (NewChain.empty()) {
-    // There exists a side effect instruction, no vectorization possible.
+    // No vectorization possible.
      InstructionsProcessed->insert(Chain.begin(), Chain.end());
      return false;
    }
@@ -867,7 +863,7 @@ bool Vectorizer::vectorizeLoadChain(
  
    ArrayRef<Value *> NewChain = getVectorizablePrefix(Chain);
    if (NewChain.empty()) {
-    // There exists a side effect instruction, no vectorization possible.
+    // No vectorization possible.
      InstructionsProcessed->insert(Chain.begin(), Chain.end());
      return false;
    }
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/lit.local.cfg b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/lit.local.cfg

new file mode 100644 (file)

index 0000000..a5e90f8
--- /dev/null
+++ b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'NVPTX' in config.root.targets:
+    config.unsupported = True
+
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll

new file mode 100644 (file)

index 0000000..e521a00
--- /dev/null
+++ b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll
@@ -0,0 +1,48 @@
+; RUN: opt -mtriple=nvptx64-nvidia-cuda -load-store-vectorizer -S -o - %s | FileCheck %s
+
+; If we have a chain of loads or stores with a side-effecting operation in the
+; middle, we should still be able to merge the loads/stores that appear
+; before/after the side-effecting op.  We just can't merge *across* the
+; side-effecting op.
+
+declare void @fn() #0
+
+; CHECK-LABEL: @merge_stores
+; CHECK: store <2 x i32> <i32 100, i32 101>
+; CHECK: call void @fn()
+; CHECK: store <2 x i32> <i32 102, i32 103>
+define void @merge_stores(i32* %out) #0 {
+  %out.gep.1 = getelementptr i32, i32* %out, i32 1
+  %out.gep.2 = getelementptr i32, i32* %out, i32 2
+  %out.gep.3 = getelementptr i32, i32* %out, i32 3
+
+  store i32 101, i32* %out.gep.1
+  store i32 100, i32* %out
+  call void @fn()
+  store i32 102, i32* %out.gep.2
+  store i32 103, i32* %out.gep.3
+  ret void
+}
+
+; CHECK-LABEL: @merge_loads
+; CHECK: load <2 x i32>
+; CHECK: call void @fn()
+; CHECK: load <2 x i32>
+define i32 @merge_loads(i32* %in) #0 {
+  %in.gep.1 = getelementptr i32, i32* %in, i32 1
+  %in.gep.2 = getelementptr i32, i32* %in, i32 2
+  %in.gep.3 = getelementptr i32, i32* %in, i32 3
+
+  %v1 = load i32, i32* %in
+  %v2 = load i32, i32* %in.gep.1
+  call void @fn()
+  %v3 = load i32, i32* %in.gep.2
+  %v4 = load i32, i32* %in.gep.3
+
+  %sum1 = add i32 %v1, %v2
+  %sum2 = add i32 %sum1, %v3
+  %sum3 = add i32 %sum2, %v4
+  ret i32 %v4
+}
+
+attributes #0 = { nounwind }
author	Justin Lebar <jlebar@google.com>
	Wed, 20 Jul 2016 20:07:34 +0000 (20:07 +0000)
committer	Justin Lebar <jlebar@google.com>
	Wed, 20 Jul 2016 20:07:34 +0000 (20:07 +0000)
llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp		patch \| blob \| history
llvm/test/Transforms/LoadStoreVectorizer/NVPTX/lit.local.cfg	[new file with mode: 0644]	patch \| blob
llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll	[new file with mode: 0644]	patch \| blob