From 307dc7b236924b5eeb5bf46b725a67dcb41bcd89 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 8 Sep 2020 11:57:50 +0200 Subject: [PATCH] [mlir][VectorOps] Clean up outdated comments. NFCI. While there - De-templatify code that can use function_ref - Make BoundCaptures usable when they're const - Address post-submit review comment (static function into global namespace) --- .../mlir/Dialect/StandardOps/EDSC/Builders.h | 18 +++--- mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp | 73 ++++++---------------- 2 files changed, 26 insertions(+), 65 deletions(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h b/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h index 36df24f..ffb3ba3 100644 --- a/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h +++ b/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h @@ -20,10 +20,10 @@ namespace edsc { class BoundsCapture { public: unsigned rank() const { return lbs.size(); } - Value lb(unsigned idx) { return lbs[idx]; } - Value ub(unsigned idx) { return ubs[idx]; } - int64_t step(unsigned idx) { return steps[idx]; } - std::tuple range(unsigned idx) { + Value lb(unsigned idx) const { return lbs[idx]; } + Value ub(unsigned idx) const { return ubs[idx]; } + int64_t step(unsigned idx) const { return steps[idx]; } + std::tuple range(unsigned idx) const { return std::make_tuple(lbs[idx], ubs[idx], steps[idx]); } void swapRanges(unsigned i, unsigned j) { @@ -34,9 +34,9 @@ public: std::swap(steps[i], steps[j]); } - ArrayRef getLbs() { return lbs; } - ArrayRef getUbs() { return ubs; } - ArrayRef getSteps() { return steps; } + ArrayRef getLbs() const { return lbs; } + ArrayRef getUbs() const { return ubs; } + ArrayRef getSteps() const { return steps; } protected: SmallVector lbs; @@ -52,8 +52,6 @@ protected: class MemRefBoundsCapture : public BoundsCapture { public: explicit MemRefBoundsCapture(Value v); - MemRefBoundsCapture(const MemRefBoundsCapture &) = default; - MemRefBoundsCapture &operator=(const MemRefBoundsCapture &) = default; unsigned fastestVarying() const { return rank() - 1; } @@ -69,8 +67,6 @@ class VectorBoundsCapture : public BoundsCapture { public: explicit VectorBoundsCapture(Value v); explicit VectorBoundsCapture(VectorType t); - VectorBoundsCapture(const VectorBoundsCapture &) = default; - VectorBoundsCapture &operator=(const VectorBoundsCapture &) = default; private: Value base; diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 801ead8..0eb46f7 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -108,8 +108,10 @@ public: private: /// Creates the loop nest on the "major" dimensions and calls the /// `loopBodyBuilder` lambda in the context of the loop nest. - template - void emitLoops(Lambda loopBodyBuilder); + void + emitLoops(llvm::function_ref + loopBodyBuilder); /// Common state to lower vector transfer ops. PatternRewriter &rewriter; @@ -129,10 +131,13 @@ private: VectorType minorVectorType; // vector<(minor_dims) x type> MemRefType memRefMinorVectorType; // memref> }; +} // namespace template -template -void NDTransferOpHelper::emitLoops(Lambda loopBodyBuilder) { +void NDTransferOpHelper::emitLoops( + llvm::function_ref + loopBodyBuilder) { /// Loop nest operates on the major dimensions MemRefBoundsCapture memrefBoundsCapture(xferOp.memref()); @@ -195,7 +200,7 @@ static Value emitInBoundsCondition(PatternRewriter &rewriter, VectorTransferOpInterface xferOp, unsigned leadingRank, ValueRange majorIvs, ValueRange majorOffsets, - MemRefBoundsCapture &memrefBounds, + const MemRefBoundsCapture &memrefBounds, SmallVectorImpl &majorIvsPlusOffsets) { Value inBoundsCondition; majorIvsPlusOffsets.reserve(majorIvs.size()); @@ -242,7 +247,7 @@ LogicalResult NDTransferOpHelper::doReplace() { emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets, ValueRange majorOffsets, ValueRange minorOffsets, - MemRefBoundsCapture &memrefBounds) { + const MemRefBoundsCapture &memrefBounds) { /// Lambda to load 1-D vector in the current loop ivs + offset context. auto load1DVector = [&](ValueRange majorIvsPlusOffsets) -> Value { SmallVector indexing; @@ -341,7 +346,7 @@ LogicalResult NDTransferOpHelper::doReplace() { emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets, ValueRange majorOffsets, ValueRange minorOffsets, - MemRefBoundsCapture &memrefBounds) { + const MemRefBoundsCapture &memrefBounds) { // Lower to 1-D vector_transfer_write and let recursion handle it. auto emitTransferWrite = [&](ValueRange majorIvsPlusOffsets) { SmallVector indexing; @@ -390,8 +395,6 @@ LogicalResult NDTransferOpHelper::doReplace() { return success(); } -} // namespace - /// Analyzes the `transfer` to find an access dimension along the fastest remote /// MemRef dimension. If such a dimension with coalescing properties is found, /// `pivs` and `vectorBoundsCapture` are swapped so that the invocation of @@ -422,8 +425,6 @@ static int computeCoalescedIndex(TransferOpTy transfer) { return coalescedIdx; } -namespace mlir { - template VectorTransferRewriter::VectorTransferRewriter( VectorTransferToSCFOptions options, MLIRContext *context) @@ -443,7 +444,7 @@ MemRefType VectorTransferRewriter::tmpMemRefType( static void emitWithBoundsChecks( PatternRewriter &rewriter, VectorTransferOpInterface transfer, - ValueRange ivs, MemRefBoundsCapture &memRefBoundsCapture, + ValueRange ivs, const MemRefBoundsCapture &memRefBoundsCapture, function_ref)> inBoundsFun, function_ref)> outOfBoundsFun = nullptr) { // Permute the incoming indices according to the permutation map. @@ -499,43 +500,13 @@ static void emitWithBoundsChecks( /// 1. local memory allocation; /// 2. perfect loop nest over: /// a. scalar load from local buffers (viewed as a scalar memref); -/// a. scalar store to original memref (with clipping). +/// a. scalar store to original memref (with padding). /// 3. vector_load from local buffer (viewed as a memref<1 x vector>); /// 4. local memory deallocation. /// /// Lowers the data transfer part of a TransferReadOp while ensuring no /// out-of-bounds accesses are possible. Out-of-bounds behavior is handled by -/// clipping. This means that a given value in memory can be read multiple -/// times and concurrently. -/// -/// Important notes about clipping and "full-tiles only" abstraction: -/// ================================================================= -/// When using clipping for dealing with boundary conditions, the same edge -/// value will appear multiple times (a.k.a edge padding). This is fine if the -/// subsequent vector operations are all data-parallel but **is generally -/// incorrect** in the presence of reductions or extract operations. -/// -/// More generally, clipping is a scalar abstraction that is expected to work -/// fine as a baseline for CPUs and GPUs but not for vector_load and DMAs. -/// To deal with real vector_load and DMAs, a "padded allocation + view" -/// abstraction with the ability to read out-of-memref-bounds (but still within -/// the allocated region) is necessary. -/// -/// Whether using scalar loops or vector_load/DMAs to perform the transfer, -/// junk values will be materialized in the vectors and generally need to be -/// filtered out and replaced by the "neutral element". This neutral element is -/// op-dependent so, in the future, we expect to create a vector filter and -/// apply it to a splatted constant vector with the proper neutral element at -/// each ssa-use. This filtering is not necessary for pure data-parallel -/// operations. -/// -/// In the case of vector_store/DMAs, Read-Modify-Write will be required, which -/// also have concurrency implications. Note that by using clipped scalar stores -/// in the presence of data-parallel only operations, we generate code that -/// writes the same value multiple time on the edge locations. -/// -/// TODO: implement alternatives to clipping. -/// TODO: support non-data-parallel operations. +/// padding. /// Performs the rewrite. template <> @@ -618,19 +589,11 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( /// 2. vector_store to local buffer (viewed as a memref<1 x vector>); /// 3. perfect loop nest over: /// a. scalar load from local buffers (viewed as a scalar memref); -/// a. scalar store to original memref (with clipping). +/// a. scalar store to original memref (if in bounds). /// 4. local memory deallocation. /// /// More specifically, lowers the data transfer part while ensuring no -/// out-of-bounds accesses are possible. Out-of-bounds behavior is handled by -/// clipping. This means that a given value in memory can be written to multiple -/// times and concurrently. -/// -/// See `Important notes about clipping and full-tiles only abstraction` in the -/// description of `readClipped` above. -/// -/// TODO: implement alternatives to clipping. -/// TODO: support non-data-parallel operations. +/// out-of-bounds accesses are possible. template <> LogicalResult VectorTransferRewriter::matchAndRewrite( Operation *op, PatternRewriter &rewriter) const { @@ -702,6 +665,8 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( return success(); } +namespace mlir { + void populateVectorToSCFConversionPatterns( OwningRewritePatternList &patterns, MLIRContext *context, const VectorTransferToSCFOptions &options) { -- 2.7.4