From: Nicolas Vasilache Date: Wed, 27 Mar 2019 21:12:01 +0000 (-0700) Subject: Give the Vectorize pass a virtualVectorSize argument. X-Git-Tag: llvmorg-11-init~1466^2~2106 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c3742d20b5210f6b44b4be89b6080b4a24d6ede7;p=platform%2Fupstream%2Fllvm.git Give the Vectorize pass a virtualVectorSize argument. This CL allows vectorization to be called and configured in other ways than just via command line arguments. This allows triggering vectorization programmatically. PiperOrigin-RevId: 240638208 --- diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h index 634f690..52b3a0f 100644 --- a/mlir/include/mlir/Transforms/Passes.h +++ b/mlir/include/mlir/Transforms/Passes.h @@ -44,7 +44,8 @@ FunctionPassBase *createCSEPass(); /// Creates a pass to vectorize loops, operations and data types using a /// target-independent, n-D super-vector abstraction. -FunctionPassBase *createVectorizePass(); +FunctionPassBase * +createVectorizePass(llvm::ArrayRef virtualVectorSize); /// Creates a pass to allow independent testing of vectorizer functionality with /// FileCheck. diff --git a/mlir/lib/Analysis/LoopAnalysis.cpp b/mlir/lib/Analysis/LoopAnalysis.cpp index e720e19..50f51d1 100644 --- a/mlir/lib/Analysis/LoopAnalysis.cpp +++ b/mlir/lib/Analysis/LoopAnalysis.cpp @@ -236,10 +236,8 @@ static bool isContiguousAccess(Value &iv, LoadOrStoreOp memoryOp, std::is_same::value, "Must be called on either const LoadOp & or const StoreOp &"); auto memRefType = memoryOp.getMemRefType(); - if (fastestVaryingDim >= memRefType.getRank()) { - memoryOp.emitError("fastest varying dim out of bounds"); + if (fastestVaryingDim >= memRefType.getRank()) return false; - } auto layoutMap = memRefType.getAffineMaps(); // TODO(ntv): remove dependence on Builder once we support non-identity @@ -335,15 +333,14 @@ static bool isVectorizableLoopWithCond(AffineForOp loop, bool mlir::isVectorizableLoopAlongFastestVaryingMemRefDim( AffineForOp loop, unsigned fastestVaryingDim) { - VectorizableInstFun fun( - [fastestVaryingDim](AffineForOp loop, Operation &op) { - auto load = op.dyn_cast(); - auto store = op.dyn_cast(); - return load ? isContiguousAccess(*loop.getInductionVar(), load, - fastestVaryingDim) - : isContiguousAccess(*loop.getInductionVar(), store, - fastestVaryingDim); - }); + VectorizableInstFun fun([fastestVaryingDim](AffineForOp loop, Operation &op) { + auto load = op.dyn_cast(); + auto store = op.dyn_cast(); + return load ? isContiguousAccess(*loop.getInductionVar(), load, + fastestVaryingDim) + : isContiguousAccess(*loop.getInductionVar(), store, + fastestVaryingDim); + }); return isVectorizableLoopWithCond(loop, fun); } diff --git a/mlir/lib/Transforms/Vectorize.cpp b/mlir/lib/Transforms/Vectorize.cpp index 98e4053..6c6ac45 100644 --- a/mlir/lib/Transforms/Vectorize.cpp +++ b/mlir/lib/Transforms/Vectorize.cpp @@ -552,7 +552,7 @@ static llvm::cl::OptionCategory clOptionsCategory("vectorize options"); static llvm::cl::list clVirtualVectorSize( "virtual-vector-size", - llvm::cl::desc("Specify n-D virtual vector size for early vectorization"), + llvm::cl::desc("Specify n-D virtual vector size for vectorization"), llvm::cl::ZeroOrMore, llvm::cl::cat(clOptionsCategory)); static llvm::cl::list clFastestVaryingPattern( @@ -652,7 +652,25 @@ static std::vector makePatterns() { namespace { struct Vectorize : public FunctionPass { + Vectorize() { + if (!clVirtualVectorSize.empty()) { + vectorSizes.reserve(clVirtualVectorSize.size()); + this->vectorSizes.assign(clVirtualVectorSize.begin(), + clVirtualVectorSize.end()); + } + } + Vectorize(ArrayRef virtualVectorSize) { + if (clVirtualVectorSize.empty()) { + this->vectorSizes.assign(virtualVectorSize.begin(), + virtualVectorSize.end()); + } else { + vectorSizes.reserve(clVirtualVectorSize.size()); + this->vectorSizes.assign(clVirtualVectorSize.begin(), + clVirtualVectorSize.end()); + } + } void runOnFunction() override; + SmallVector vectorSizes; }; } // end anonymous namespace @@ -1236,8 +1254,7 @@ void Vectorize::runOnFunction() { for (auto m : matches) { VectorizationStrategy strategy; // TODO(ntv): depending on profitability, elect to reduce the vector size. - strategy.vectorSizes.assign(clVirtualVectorSize.begin(), - clVirtualVectorSize.end()); + strategy.vectorSizes.assign(vectorSizes.begin(), vectorSizes.end()); if (failed(analyzeProfitability(m.getMatchedChildren(), 1, patternDepth, &strategy))) { continue; @@ -1253,7 +1270,10 @@ void Vectorize::runOnFunction() { LLVM_DEBUG(dbgs() << "\n"); } -FunctionPassBase *mlir::createVectorizePass() { return new Vectorize(); } +FunctionPassBase * +mlir::createVectorizePass(llvm::ArrayRef virtualVectorSize) { + return new Vectorize(virtualVectorSize); +} static PassRegistration pass("vectorize", diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp index ec8ceb4..b3b0f8c 100644 --- a/mlir/test/EDSC/builder-api-test.cpp +++ b/mlir/test/EDSC/builder-api-test.cpp @@ -27,8 +27,10 @@ #include "mlir/IR/StandardTypes.h" #include "mlir/IR/Types.h" #include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassManager.h" #include "mlir/StandardOps/Ops.h" #include "mlir/Transforms/LoopUtils.h" +#include "mlir/Transforms/Passes.h" #include "Test.h" @@ -528,6 +530,56 @@ TEST_FUNC(tile_2d) { f->print(llvm::outs()); } +// Inject an EDSC-constructed computation to exercise 2-d vectorization. +TEST_FUNC(vectorize_2d) { + using namespace edsc; + using namespace edsc::intrinsics; + using namespace edsc::op; + auto memrefType = + MemRefType::get({-1, -1, -1}, FloatType::getF32(&globalContext()), {}, 0); + auto owningF = + makeFunction("vectorize_2d", {}, {memrefType, memrefType, memrefType}); + + mlir::Function *f = owningF.release(); + mlir::Module module(&globalContext()); + module.getFunctions().push_back(f); + + ScopedContext scope(f); + ValueHandle zero = constant_index(0); + MemRefView vA(f->getArgument(0)), vB(f->getArgument(1)), + vC(f->getArgument(2)); + IndexedValue A(f->getArgument(0)), B(f->getArgument(1)), C(f->getArgument(2)); + IndexHandle M(vA.ub(0)), N(vA.ub(1)), P(vA.ub(2)); + + // clang-format off + IndexHandle i, j, k; + LoopNestBuilder({&i, &j, &k}, {zero, zero, zero}, {M, N, P}, {1, 1, 1})({ + C(i, j, k) = A(i, j, k) + B(i, j, k) + }); + ret(); + + // CHECK-LABEL: func @vectorize_2d + // CHECK-NEXT: %[[M:.*]] = dim %arg0, 0 : memref + // CHECK-NEXT: %[[N:.*]] = dim %arg0, 1 : memref + // CHECK-NEXT: %[[P:.*]] = dim %arg0, 2 : memref + // CHECK-NEXT: affine.for %i0 = 0 to (d0) -> (d0)(%[[M]]) { + // CHECK-NEXT: affine.for %i1 = 0 to (d0) -> (d0)(%[[N]]) step 4 { + // CHECK-NEXT: affine.for %i2 = 0 to (d0) -> (d0)(%[[P]]) step 4 { + // CHECK-NEXT: %[[vA:.*]] = "vector_transfer_read"(%arg1, %i0, %i1, %i2) {permutation_map: (d0, d1, d2) -> (d1, d2)} : (memref, index, index, index) -> vector<4x4xf32> + // CHECK-NEXT: %[[vB:.*]] = "vector_transfer_read"(%arg0, %i0, %i1, %i2) {permutation_map: (d0, d1, d2) -> (d1, d2)} : (memref, index, index, index) -> vector<4x4xf32> + // CHECK-NEXT: %[[vRES:.*]] = addf %[[vB]], %[[vA]] : vector<4x4xf32> + // CHECK-NEXT: "vector_transfer_write"(%[[vRES:.*]], %arg2, %i0, %i1, %i2) {permutation_map: (d0, d1, d2) -> (d1, d2)} : (vector<4x4xf32>, memref, index, index, index) -> () + // clang-format on + + mlir::PassManager pm; + pm.addPass(mlir::createCanonicalizerPass()); + SmallVector vectorSizes{4, 4}; + pm.addPass(mlir::createVectorizePass(vectorSizes)); + auto result = pm.run(f->getModule()); + if (succeeded(result)) + f->print(llvm::outs()); +} + int main() { RUN_TESTS(); return 0;