From 62b03e344eeb168d0ac87a442275b28d3e95016a Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 20 Jul 2016 20:07:34 +0000 Subject: [PATCH] [LSV] Vectorize up to side-effecting instructions. Summary: Previously if we had a chain that contained a side-effecting instruction, we wouldn't vectorize it at all. Now we'll vectorize everything that comes before the side-effecting instruction. Reviewers: asbirlea Subscribers: arsenm, jholewinski, llvm-commits, mzolotukhin Differential Revision: https://reviews.llvm.org/D22536 llvm-svn: 276170 --- .../Transforms/Vectorize/LoadStoreVectorizer.cpp | 10 ++--- .../LoadStoreVectorizer/NVPTX/lit.local.cfg | 3 ++ .../NVPTX/merge-across-side-effects.ll | 48 ++++++++++++++++++++++ 3 files changed, 54 insertions(+), 7 deletions(-) create mode 100644 llvm/test/Transforms/LoadStoreVectorizer/NVPTX/lit.local.cfg create mode 100644 llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index ec3e734..674c451 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -439,13 +439,10 @@ ArrayRef Vectorizer::getVectorizablePrefix(ArrayRef Chain) { ChainInstrs.push_back({&I, InstrIdx}); } else if (I.mayHaveSideEffects()) { DEBUG(dbgs() << "LSV: Found side-effecting operation: " << I << '\n'); - return 0; + break; } } - assert(Chain.size() == ChainInstrs.size() && - "All instrs in Chain must be within range getBoundaryInstrs(Chain)."); - // Loop until we find an instruction in ChainInstrs that we can't vectorize. unsigned ChainInstrIdx, ChainInstrsLen; for (ChainInstrIdx = 0, ChainInstrsLen = ChainInstrs.size(); @@ -479,7 +476,6 @@ ArrayRef Vectorizer::getVectorizablePrefix(ArrayRef Chain) { DEBUG({ Value *Ptr0 = getPointerOperand(M0); Value *Ptr1 = getPointerOperand(M1); - dbgs() << "LSV: Found alias:\n" " Aliasing instruction and pointer:\n" << " " << *MemInstr << '\n' @@ -713,7 +709,7 @@ bool Vectorizer::vectorizeStoreChain( ArrayRef NewChain = getVectorizablePrefix(Chain); if (NewChain.empty()) { - // There exists a side effect instruction, no vectorization possible. + // No vectorization possible. InstructionsProcessed->insert(Chain.begin(), Chain.end()); return false; } @@ -867,7 +863,7 @@ bool Vectorizer::vectorizeLoadChain( ArrayRef NewChain = getVectorizablePrefix(Chain); if (NewChain.empty()) { - // There exists a side effect instruction, no vectorization possible. + // No vectorization possible. InstructionsProcessed->insert(Chain.begin(), Chain.end()); return false; } diff --git a/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/lit.local.cfg b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/lit.local.cfg new file mode 100644 index 0000000..a5e90f8 --- /dev/null +++ b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'NVPTX' in config.root.targets: + config.unsupported = True + diff --git a/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll new file mode 100644 index 0000000..e521a00 --- /dev/null +++ b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll @@ -0,0 +1,48 @@ +; RUN: opt -mtriple=nvptx64-nvidia-cuda -load-store-vectorizer -S -o - %s | FileCheck %s + +; If we have a chain of loads or stores with a side-effecting operation in the +; middle, we should still be able to merge the loads/stores that appear +; before/after the side-effecting op. We just can't merge *across* the +; side-effecting op. + +declare void @fn() #0 + +; CHECK-LABEL: @merge_stores +; CHECK: store <2 x i32> +; CHECK: call void @fn() +; CHECK: store <2 x i32> +define void @merge_stores(i32* %out) #0 { + %out.gep.1 = getelementptr i32, i32* %out, i32 1 + %out.gep.2 = getelementptr i32, i32* %out, i32 2 + %out.gep.3 = getelementptr i32, i32* %out, i32 3 + + store i32 101, i32* %out.gep.1 + store i32 100, i32* %out + call void @fn() + store i32 102, i32* %out.gep.2 + store i32 103, i32* %out.gep.3 + ret void +} + +; CHECK-LABEL: @merge_loads +; CHECK: load <2 x i32> +; CHECK: call void @fn() +; CHECK: load <2 x i32> +define i32 @merge_loads(i32* %in) #0 { + %in.gep.1 = getelementptr i32, i32* %in, i32 1 + %in.gep.2 = getelementptr i32, i32* %in, i32 2 + %in.gep.3 = getelementptr i32, i32* %in, i32 3 + + %v1 = load i32, i32* %in + %v2 = load i32, i32* %in.gep.1 + call void @fn() + %v3 = load i32, i32* %in.gep.2 + %v4 = load i32, i32* %in.gep.3 + + %sum1 = add i32 %v1, %v2 + %sum2 = add i32 %sum1, %v3 + %sum3 = add i32 %sum2, %v4 + ret i32 %v4 +} + +attributes #0 = { nounwind } -- 2.7.4