From 44cfea0279a4fb9ea8cb0c68a2b5ee7a81654071 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Mon, 10 Oct 2022 02:05:14 -0700 Subject: [PATCH] [mlir][Linalg] Retire LinalgStrategyTilePass and filter-based pattern. Context: https://discourse.llvm.org/t/psa-retire-linalg-filter-based-patterns/63785 Uses of `LinalgTilingPattern::returningMatchAndRewrite` are replaced by a top-level `tileWithLinalgTilingOptions` function that is marked obsolete and serves as a temporary means to transition away from `LinalgTilingOptions`-based tiling. LinalgTilingOptions supports too many options that have been orthogonalized with the use of the transform dialect. Additionally, the revision introduces a `transform.structured.tile_to_scf_for` structured transform operation that is needed to properly tile `tensor.pad` via the TilingInterface. Uses of `transform.structured.tile` will be deprecated and replaced by this new op. This will achieve the deprecation of `linalg::tileLinalgOp`. Context: https://discourse.llvm.org/t/psa-retire-tileandfuselinalgops-method/63850 In the process of transitioning, tests that were performing tile and distribute on tensors are retired: transformations should be orthogonalized better in the future. In particular, tiling to specific loop types and tileAndDistribute behavior are not available via the transform ops. The behavior is still available as part of the `tileWithLinalgTilingOptions` method to allow downstream clients to transition without breakages but is meant to be retired soon. As more tests are ported to the transform dialect, it became necessary to introduce a test-transform-dialect-erase-schedule-pass to discard the transform specification once applied so that e2e lowering and execution is possible. Lastly, a number of redundant tests that were testing composition of patterns are retired as they are available with a better mechanism via the transform dialect. Differential Revision: https://reviews.llvm.org/D135573 --- mlir/include/mlir/Dialect/Linalg/Passes.h | 5 - mlir/include/mlir/Dialect/Linalg/Passes.td | 29 -- .../Linalg/TransformOps/LinalgTransformOps.td | 57 ++++ .../Dialect/Linalg/Transforms/CodegenStrategy.h | 33 -- .../mlir/Dialect/Linalg/Transforms/Transforms.h | 90 ++---- .../Linalg/TransformOps/LinalgTransformOps.cpp | 165 +++++++++- .../Linalg/Transforms/LinalgStrategyPasses.cpp | 42 --- mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp | 69 ----- mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp | 24 +- mlir/test/Dialect/Linalg/tile-and-distribute.mlir | 219 -------------- .../test/Dialect/Linalg/tile-and-peel-tensors.mlir | 110 ------- mlir/test/Dialect/Linalg/tile-conv.mlir | 8 +- mlir/test/Dialect/Linalg/tile-indexed.mlir | 53 +--- mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir | 251 +++++++++------- mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir | 113 ------- mlir/test/Dialect/Linalg/tile-parallel.mlir | 68 ----- .../Linalg/tile-scalarize-dynamic-dims.mlir | 74 ----- mlir/test/Dialect/Linalg/tile-tensors.mlir | 19 +- mlir/test/Dialect/Linalg/tile-zero.mlir | 12 - mlir/test/Dialect/Linalg/tile.mlir | 331 --------------------- mlir/test/Dialect/Linalg/transform-patterns.mlir | 118 +++++--- .../Dialect/Linalg/CPU/test-conv-1d-call.mlir | 10 +- .../Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir | 10 +- .../Dialect/Linalg/CPU/test-conv-2d-call.mlir | 10 +- .../Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir | 10 +- .../Dialect/Linalg/CPU/test-conv-3d-call.mlir | 10 +- .../Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir | 9 +- .../Dialect/Linalg/CPU/test-tensor-matmul.mlir | 10 +- .../lib/Dialect/Linalg/TestLinalgTransforms.cpp | 303 +------------------ .../Transform/TestTransformDialectInterpreter.cpp | 29 ++ mlir/tools/mlir-opt/mlir-opt.cpp | 2 + 31 files changed, 588 insertions(+), 1705 deletions(-) delete mode 100644 mlir/test/Dialect/Linalg/tile-and-distribute.mlir delete mode 100644 mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir delete mode 100644 mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir delete mode 100644 mlir/test/Dialect/Linalg/tile-parallel.mlir delete mode 100644 mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir delete mode 100644 mlir/test/Dialect/Linalg/tile-zero.mlir delete mode 100644 mlir/test/Dialect/Linalg/tile.mlir diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h index 6e41f05..40ca027 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.h +++ b/mlir/include/mlir/Dialect/Linalg/Passes.h @@ -39,11 +39,6 @@ std::unique_ptr createFoldReshapeOpsByLinearizationPass(); std::unique_ptr createLinalgNamedOpConversionPass(); std::unique_ptr> -createLinalgTilingPass(ArrayRef tileSizes = {}, - linalg::LinalgTilingLoopType loopType = - linalg::LinalgTilingLoopType::Loops); - -std::unique_ptr> createLinalgInlineScalarOperandsPass(); /// Create a pass to convert Linalg operations to scf.for loops and diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td index 40a2f11..73fd30b 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -102,22 +102,6 @@ def LinalgBufferize : Pass<"linalg-bufferize", "func::FuncOp"> { ]; } -def LinalgTilingPass : Pass<"linalg-tile", "func::FuncOp"> { - let summary = "Tile operations in the linalg dialect"; - let constructor = "mlir::createLinalgTilingPass()"; - let dependentDialects = [ - "AffineDialect", - "linalg::LinalgDialect", - "memref::MemRefDialect", - "scf::SCFDialect" - ]; - let options = [ - ListOption<"tileSizes", "tile-sizes", "int64_t", "Tile sizes">, - Option<"loopType", "loop-type", "std::string", /*default=*/"\"for\"", - "Specify the type of loops to generate: for, parallel"> - ]; -} - def LinalgGeneralization : Pass<"linalg-generalize-named-ops", "func::FuncOp"> { let summary = "Convert named ops into generic ops"; let constructor = "mlir::createLinalgGeneralizationPass()"; @@ -162,19 +146,6 @@ def LinalgDetensorize : Pass<"linalg-detensorize", ""> { ]; } -def LinalgStrategyTilePass - : Pass<"linalg-strategy-tile-pass", "func::FuncOp"> { - let summary = "Configurable pass to apply pattern-based linalg tiling."; - let constructor = "mlir::createLinalgStrategyTilePass()"; - let dependentDialects = ["linalg::LinalgDialect"]; - let options = [ - Option<"anchorFuncName", "anchor-func", "std::string", /*default=*/"", - "Which func op is the anchor to latch on.">, - Option<"anchorOpName", "anchor-op", "std::string", /*default=*/"", - "Which linalg op within the func is the anchor to latch on.">, - ]; -} - def LinalgStrategyRemoveMarkersPass : Pass<"linalg-strategy-remove-markers-pass", "func::FuncOp"> { let summary = "Cleanup pass that drops markers."; diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index 491c5a8..be4efaa 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -751,6 +751,63 @@ def TileToForeachThreadOp : }]; } +def TileToScfForOp : Op, + DeclareOpInterfaceMethods]> { + let description = [{ + Indicates that the given `target` op should be tiled with the given sizes. + This transform generates a loop nest with a smaller ("tiled") target + operation in its body. The target must implement TilingInterface. + + Tile sizes may be known at transformation time, in which case they are + expected to be provided in the `static_size` attribute, or not, in which + case the tile value must be computed by the payload IR and the handle to the + operation computing it must be provided through `dynamic_sizes`. When the + sizes are not known statically, the corresponding entry in the + `static_sizes` attribute must be set to `ShapedType::kDynamicSize`. Only + the dynamic sizes must be provided in `dynamic_sizes`, i.e., there should + be as many handles as `ShapedType::kDynamicSize` values in the + `static_sizes` attribute. A static size of `0` indicates that the dimension + should not be tiled. No loop will be generated for such dimensions. If all + tile sizes are `0`, this transform is effectively a no-op. + + This op returns handles to the tiled op (in the generated loop nest) and the + generated loops. The number of loops is the number of tile sizes that are + statically known to be non-zero. + + #### Return modes + + On success, the resulting handles are associated with co-indexed lists of + tiled operations and loops around them. + + This operation only supports TilingInterface ops and produces a silenceable + failure if the input contains any non-TilingInterface ops. The ops preceding + it in the list associated with the `target` handle will have been tiled. + + This operation produces a silenceable failure if the `dynamic_sizes` handles + are associated with lists of payload operations of a size different than + that of the list associated with the `target` handle. + + If the internal implementation of tiling for any of the operations fails, + produces a definite failure. + }]; + + let arguments = (ins PDL_Operation:$target, + Variadic:$dynamic_sizes, + DefaultValuedAttr:$static_sizes, + DefaultValuedAttr:$interchange); + let results = (outs PDL_Operation:$tiled_linalg_op, + Variadic:$loops); + + let hasCustomAssemblyFormat = 1; + + let extraClassDeclaration = [{ + /// Returns the list of tile sizes, which may be static (Attribute) or + /// dynamic (Value). + SmallVector getMixedSizes(); + }]; +} + def VectorizeOp : Op { diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h b/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h index d7c0d22..ae3df32 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h @@ -30,41 +30,8 @@ struct Transformation { LinalgTransformationFilter::FilterFunction filter = nullptr; }; -/// Represent one application of LinalgStrategyTilePass. -struct Tile : public Transformation { - Tile(StringRef name, linalg::LinalgTilingOptions options, - LinalgTransformationFilter::FilterFunction f = nullptr) - : Transformation(std::move(f)), opName(name), - options(std::move(options)) {} - - void addToPassPipeline(OpPassManager &pm, - LinalgTransformationFilter m) const override { - pm.addPass(createLinalgStrategyTilePass(opName, options, m)); - } - -private: - std::string opName; - linalg::LinalgTilingOptions options; -}; - /// Codegen strategy controls how a Linalg op is progressively lowered. struct CodegenStrategy { - /// Append a pattern to add a level of tiling for Op `opName` with tiling - /// `options`. - CodegenStrategy & - tile(StringRef opName, const linalg::LinalgTilingOptions &options, - const LinalgTransformationFilter::FilterFunction &f = nullptr) { - transformationSequence.emplace_back( - std::make_unique(opName, options, f)); - return *this; - } - /// Conditionally append a pattern to add a level of tiling for - /// `LinalgOpType` with tiling `options`. - CodegenStrategy & - tileIf(bool b, StringRef opName, linalg::LinalgTilingOptions options, - LinalgTransformationFilter::FilterFunction f = nullptr) { - return b ? tile(opName, std::move(options), std::move(f)) : *this; - } /// Configure the post staged-patterns global enabling passes options. CodegenStrategy & setVectorTransferToSCFOptions(LinalgEnablingOptions options) { diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index fb37c6f..044ce8d 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -696,57 +696,26 @@ struct LinalgTilingOptions { RewritePatternSet getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx); void populateLinalgTilingCanonicalizationPatterns(RewritePatternSet &patterns); -/// -/// Linalg tiling pattern. -/// -/// Apply the `tiling` transformation as a pattern. -/// `filter` controls LinalgTransformMarker matching and update when specified. -/// See `tiling` for more details. -// TODO: TiledOpInterface -struct LinalgTilingPattern : public OpInterfaceRewritePattern { - /// Construct a generic pattern applied to all LinalgOp that verify `filter`. - LinalgTilingPattern( - MLIRContext *context, LinalgTilingOptions options, - LinalgTransformationFilter f = LinalgTransformationFilter(), - PatternBenefit benefit = 1); - - /// Construct a pattern specifically applied to `opName`. - LinalgTilingPattern( - StringRef opName, MLIRContext *context, LinalgTilingOptions options, - LinalgTransformationFilter f = LinalgTransformationFilter(), - PatternBenefit benefit = 1); - - /// `matchAndRewrite` implementation that returns the significant transformed - /// pieces of IR. - FailureOr - returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const; - - LogicalResult matchAndRewrite(LinalgOp op, - PatternRewriter &rewriter) const override { - return returningMatchAndRewrite(op, rewriter); - } - -private: - /// LinalgTransformMarker handles special attribute manipulations. - LinalgTransformationFilter filter; - /// Options to control tiling; - LinalgTilingOptions options; -}; +/// Perform tiling using LinalgTilingOptions. +/// Note: this is on a path to deprecation that only works on LinalgOp. +/// Clients should favor using `tileUsingSCFForOp` that more generally works on +/// TilingInterface. +FailureOr +tileWithLinalgTilingOptions(RewriterBase &rewriter, LinalgOp op, + const LinalgTilingOptions &options); /// /// Linalg padding pattern. /// /// Apply the `padding` transformation as a pattern. -/// `filter` controls LinalgTransformMarker matching and update when specified. /// See `padding` for more details. struct LinalgPaddingPattern : public OpInterfaceRewritePattern { - /// Construct a generic pattern applied to all LinalgOp that verify `filter`. LinalgPaddingPattern(MLIRContext *context, LinalgPaddingOptions options = LinalgPaddingOptions(), PatternBenefit benefit = 1); - /// `matchAndRewrite` implementation that returns the significant transformed - /// pieces of IR. + /// `matchAndRewrite` implementation that returns the significant + /// transformed pieces of IR. FailureOr returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const; @@ -954,9 +923,9 @@ void populateLinalgNamedOpsGeneralizationPatterns( /// Linalg decompose convolutions patterns -/// Populates patterns to decompose high-D convolution ops into low-D ones. This -/// is a step in progressive lowering for convolution ops, afterwards we can -/// vectorize the low-D convolution ops. +/// Populates patterns to decompose high-D convolution ops into low-D ones. +/// This is a step in progressive lowering for convolution ops, afterwards we +/// can vectorize the low-D convolution ops. void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns, PatternBenefit benefit = 1); @@ -977,8 +946,8 @@ struct PadOpTransformationPattern : public OpRewritePattern { /// a static bounding box. Use `paddingValues` and `packPaddings` to set padding /// value and nofold attribute of the created tensor::PadOps, respectively. /// Update `paddedOp` to the cloned operation with statically shaped -/// `paddingDimensions` and return the extracted dynamically shaped results. If -/// padding fails, return failure. +/// `paddingDimensions` and return the extracted dynamically shaped results. +/// If padding fails, return failure. FailureOr> rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad, ArrayRef paddingDimensions, @@ -1132,29 +1101,6 @@ public: const LinalgTransformationFilter &f) {} }; -template -class TilingPatterns; - -template <> -class TilingPatterns<> { -public: - static void insert(RewritePatternSet &patterns, - const LinalgTilingOptions &options, - const LinalgTransformationFilter &f) {} -}; - -template -class TilingPatterns { -public: - static void insert(RewritePatternSet &patterns, - const LinalgTilingOptions &options, - const LinalgTransformationFilter &f) { - patterns.add(OpTy::getOperationName(), - patterns.getContext(), options, f); - TilingPatterns::insert(patterns, options, f); - } -}; - /// Split Reduction options. struct SplitReductionOptions { // Ratio used to split the reduction dimension. If the ratio is <= 1, nothing @@ -1181,8 +1127,10 @@ void populateSplitReductionPattern( /// Apply transformation to split the single linalg op reduction into a parallel /// and reduction dimension. Then create a new linalg.generic op doing the rest -/// of the reduction. Return the new linalg op with an extra parallel dimension -/// or failure if the transformation didn't happen. +/// of the reduction. +/// Return the new linalg op with an extra parallel dimension or failure if the +/// transformation didn't happen. +/// /// Example: /// ``` /// %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, @@ -1265,7 +1213,7 @@ splitReduction(PatternRewriter &b, LinalgOp op, /// %3 = linalg.generic {indexing_maps = [#map0, #map1, #map2, #map3], /// iterator_types = ["parallel", "parallel", "parallel", "reduction"]} /// ins(%A, %B, %2 : tensor<16x256xf32>, tensor<256x32xf32>, tensor<64x4xi1>) -/// outs(%1 : tensor<16x32x64xf32>) { +/// outs(%1 : tensor<16x32x64xf32>) { /// ^bb0(%arg3: f32, %arg4: f32, %arg5: i1, %arg6: f32): /// %5 = arith.mulf %arg3, %arg4 : f32 /// %6 = arith.addf %arg6, %5 : f32 diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 5b82520..ed74de7 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -37,6 +37,16 @@ static SmallVector extractUIntArray(ArrayAttr attr) { return result; } +/// Extracts a vector of int64_t from an array attribute. Asserts if the +/// attribute contains values other than integers. +static SmallVector extractI64Array(ArrayAttr attr) { + SmallVector result; + result.reserve(attr.size()); + for (APInt value : attr.getAsValueRange()) + result.push_back(value.getSExtValue()); + return result; +} + namespace { /// A simple pattern rewriter that implements no special logic. class SimpleRewriter : public PatternRewriter { @@ -858,11 +868,10 @@ transform::ScalarizeOp::applyToOne(linalg::LinalgOp target, // Tiling with "scalarize_dyn_dims" actually sets the same lambda as the // tile sizes and asserts that it is not already set. SmallVector emptyTileSizes; - LinalgTilingPattern pattern(getContext(), tilingOptions); SimpleRewriter rewriter(getContext()); rewriter.setInsertionPoint(target); FailureOr result = - pattern.returningMatchAndRewrite(target, rewriter); + tileWithLinalgTilingOptions(rewriter, target, tilingOptions); if (failed(result)) return DiagnosedSilenceableFailure(reportUnknownTransformError(target)); @@ -1052,7 +1061,6 @@ transform::SplitReductionOp::applyToOne(linalg::LinalgOp target, DiagnosedSilenceableFailure transform::TileOp::apply(TransformResults &transformResults, TransformState &state) { - LinalgTilingOptions tilingOptions; SmallVector tileSizes = extractFromI64ArrayAttr(getStaticSizes()); ArrayRef targets = state.getPayloadOps(getTarget()); @@ -1097,6 +1105,7 @@ transform::TileOp::apply(TransformResults &transformResults, return diag; } + LinalgTilingOptions tilingOptions; unsigned index = en.index(); if (!tileSizes.empty()) { tilingOptions.setTileSizeComputationFunction( @@ -1118,10 +1127,9 @@ transform::TileOp::apply(TransformResults &transformResults, } tilingOptions.setInterchange(extractUIntArray(getInterchange())); - LinalgTilingPattern pattern(getContext(), tilingOptions); SimpleRewriter rewriter(linalgOp.getContext()); FailureOr tiledOp = - pattern.returningMatchAndRewrite(linalgOp, rewriter); + tileWithLinalgTilingOptions(rewriter, linalgOp, tilingOptions); if (failed(tiledOp)) return DiagnosedSilenceableFailure::definiteFailure(); @@ -1341,6 +1349,153 @@ LogicalResult TileToForeachThreadOp::verify() { } //===----------------------------------------------------------------------===// +// TileToScfForOp +//===----------------------------------------------------------------------===// + +DiagnosedSilenceableFailure +transform::TileToScfForOp::apply(TransformResults &transformResults, + TransformState &state) { + SmallVector tileSizes = extractFromI64ArrayAttr(getStaticSizes()); + + ArrayRef targets = state.getPayloadOps(getTarget()); + SmallVector> dynamicSizeProducers; + dynamicSizeProducers.reserve(getDynamicSizes().size()); + for (Value dynamicSizeProducerHandle : getDynamicSizes()) { + dynamicSizeProducers.push_back( + state.getPayloadOps(dynamicSizeProducerHandle)); + + if (dynamicSizeProducers.back().size() != targets.size()) { + DiagnosedSilenceableFailure diag = + emitSilenceableError() + << "expected as many dynamic size-producing operations (" + << dynamicSizeProducers.back().size() << ") as target ops (" + << targets.size() << ")"; + diag.attachNote(dynamicSizeProducerHandle.getLoc()) << "for this handle"; + return diag; + } + + for (Operation *op : dynamicSizeProducers.back()) { + if (op->getNumResults() == 1 && + op->getResult(0).getType().isa()) + continue; + DiagnosedSilenceableFailure diag = + emitSilenceableError() << "expected sizes to be produced by ops " + "with a single index-type result"; + diag.attachNote(op->getLoc()) << "size producer op"; + diag.attachNote(dynamicSizeProducerHandle.getLoc()) << "for this handle"; + return diag; + } + } + + SmallVector tiled; + SmallVector, 4> loops; + loops.resize(getLoops().size()); + for (auto &en : llvm::enumerate(targets)) { + auto tilingInterfaceOp = dyn_cast(en.value()); + if (!tilingInterfaceOp) { + DiagnosedSilenceableFailure diag = + emitSilenceableError() << "only TilingInterface ops are supported"; + diag.attachNote(en.value()->getLoc()) << "target op"; + return diag; + } + + scf::SCFTilingOptions tilingOptions; + unsigned index = en.index(); + if (!tileSizes.empty()) { + tilingOptions.setTileSizeComputationFunction( + [&, index](OpBuilder &b, Operation *) { + SmallVector sizes; + sizes.reserve(tileSizes.size()); + unsigned dynamicIdx = 0; + for (OpFoldResult ofr : getMixedSizes()) { + if (auto attr = ofr.dyn_cast()) { + sizes.push_back(b.create( + getLoc(), attr.cast().getInt())); + } else { + sizes.push_back( + dynamicSizeProducers[dynamicIdx++][index]->getResult(0)); + } + } + return sizes; + }); + } + + tilingOptions.setInterchange(extractI64Array(getInterchange())); + SimpleRewriter rewriter(tilingInterfaceOp.getContext()); + FailureOr tilingResult = + tileUsingSCFForOp(rewriter, tilingInterfaceOp, tilingOptions); + if (failed(tilingResult)) + return DiagnosedSilenceableFailure::definiteFailure(); + + rewriter.replaceOp(tilingInterfaceOp, tilingResult->replacements); + + tiled.push_back(tilingResult->tiledOp); + for (const auto &en2 : llvm::enumerate(tilingResult->loops)) + loops[en2.index()].push_back(en2.value()); + } + + transformResults.set(getTiledLinalgOp().cast(), tiled); + for (const auto &en : llvm::enumerate(loops)) + transformResults.set(getLoops()[en.index()].cast(), en.value()); + + return DiagnosedSilenceableFailure::success(); +} + +SmallVector transform::TileToScfForOp::getMixedSizes() { + ValueRange dynamic = getDynamicSizes(); + SmallVector tileSizes = extractFromI64ArrayAttr(getStaticSizes()); + SmallVector results; + results.reserve(tileSizes.size()); + unsigned dynamicPos = 0; + Builder builder(getContext()); + for (int64_t size : tileSizes) { + if (size == ShapedType::kDynamicSize) { + results.push_back(dynamic[dynamicPos++]); + } else { + results.push_back(builder.getIndexAttr(size)); + } + } + return results; +} + +ParseResult transform::TileToScfForOp::parse(OpAsmParser &parser, + OperationState &result) { + OpAsmParser::UnresolvedOperand target; + SmallVector dynamicSizes; + ArrayAttr staticSizes; + auto pdlOperationType = pdl::OperationType::get(parser.getContext()); + if (parser.parseOperand(target) || + parser.resolveOperand(target, pdlOperationType, result.operands) || + parseDynamicIndexList(parser, dynamicSizes, staticSizes, + ShapedType::kDynamicSize) || + parser.resolveOperands(dynamicSizes, pdlOperationType, result.operands) || + parser.parseOptionalAttrDict(result.attributes)) + return ParseResult::failure(); + + result.addAttribute(getStaticSizesAttrName(result.name), staticSizes); + size_t numExpectedLoops = + staticSizes.size() - llvm::count(extractFromI64ArrayAttr(staticSizes), 0); + result.addTypes(SmallVector(numExpectedLoops + 1, pdlOperationType)); + return success(); +} + +void TileToScfForOp::print(OpAsmPrinter &p) { + p << ' ' << getTarget(); + printDynamicIndexList(p, getOperation(), getDynamicSizes(), getStaticSizes(), + ShapedType::kDynamicSize); + p.printOptionalAttrDict((*this)->getAttrs(), {getStaticSizesAttrName()}); +} + +void transform::TileToScfForOp::getEffects( + SmallVectorImpl &effects) { + consumesHandle(getTarget(), effects); + onlyReadsHandle(getDynamicSizes(), effects); + producesHandle(getTiledLinalgOp(), effects); + producesHandle(getLoops(), effects); + modifiesPayload(effects); +} + +//===----------------------------------------------------------------------===// // VectorizeOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp index 162e74f..39a9c7f 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp @@ -51,40 +51,6 @@ using namespace linalg; namespace { -/// Configurable pass to apply pattern-based linalg tiling. -struct LinalgStrategyTilePass - : public impl::LinalgStrategyTilePassBase { - - LinalgStrategyTilePass() = default; - - LinalgStrategyTilePass(StringRef opName, - mlir::linalg::LinalgTilingOptions opt, - LinalgTransformationFilter filt) - : options(std::move(opt)), filter(std::move(filt)) { - this->anchorOpName.setValue(opName.str()); - } - - void runOnOperation() override { - auto funcOp = getOperation(); - if (!anchorFuncName.empty() && funcOp.getName() != anchorFuncName) - return; - - MLIRContext *ctx = funcOp.getContext(); - RewritePatternSet tilingPattern(ctx); - if (!anchorOpName.empty()) - tilingPattern.add(anchorOpName, ctx, options, - filter); - else - tilingPattern.add(ctx, options, filter); - if (anchorOpName == tensor::PadOp::getOperationName()) - populatePadTensorTilingPatterns(tilingPattern, options); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(tilingPattern)); - } - - mlir::linalg::LinalgTilingOptions options; - LinalgTransformationFilter filter; -}; - /// Configurable pass to lower vector operations. struct LinalgStrategyRemoveMarkersPass : public impl::LinalgStrategyRemoveMarkersPassBase< @@ -101,14 +67,6 @@ struct LinalgStrategyRemoveMarkersPass }; } // namespace -/// Create a LinalgStrategyTilePass. -std::unique_ptr> -mlir::createLinalgStrategyTilePass(StringRef opName, - const LinalgTilingOptions &opt, - const LinalgTransformationFilter &filter) { - return std::make_unique(opName, opt, filter); -} - /// Create a LinalgStrategyRemoveMarkersPass. std::unique_ptr> mlir::createLinalgStrategyRemoveMarkersPass() { diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp index d377906..c0ff3e0 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp @@ -732,77 +732,8 @@ void mlir::linalg::populateLinalgTilingCanonicalizationPatterns( >::insert(patterns); } -/// Populate the given list with patterns that apply Linalg tiling. -static void insertTilingPatterns(RewritePatternSet &patterns, - const LinalgTilingOptions &options) { - auto *ctx = patterns.getContext(); - LinalgTransformationFilter f(ArrayRef{}, - StringAttr::get(ctx, "tiled")); - TilingPatterns::insert(patterns, options, f); - patterns.add(ctx, options); -} - void mlir::linalg::populatePadTensorTilingPatterns( RewritePatternSet &patterns, const LinalgTilingOptions &options) { auto *ctx = patterns.getContext(); patterns.add(ctx, options); } - -static void applyExtractSliceOfPadTensorSwapPattern(func::FuncOp funcOp) { - MLIRContext *ctx = funcOp.getContext(); - RewritePatternSet patterns(ctx); - patterns.add(patterns.getContext()); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); - (void)applyPatternsAndFoldGreedily( - funcOp, getLinalgTilingCanonicalizationPatterns(ctx)); -} - -namespace { -struct LinalgTilingPass : public impl::LinalgTilingPassBase { - LinalgTilingPass() = default; - LinalgTilingPass(ArrayRef tileSizes, LinalgTilingLoopType loopType) { - this->tileSizes = tileSizes; - this->loopType = ""; - this->loopTypeEnum = loopType; - } - - void runOnOperation() override { - func::FuncOp funcOp = getOperation(); - LinalgTilingLoopType type = - llvm::StringSwitch(loopType) - .Case("for", LinalgTilingLoopType::Loops) - .Case("affine", LinalgTilingLoopType::AffineLoops) - .Case("parallel", LinalgTilingLoopType::ParallelLoops) - .Default(loopTypeEnum); - auto options = - LinalgTilingOptions().setTileSizes(tileSizes).setLoopType(type); - MLIRContext *ctx = funcOp.getContext(); - RewritePatternSet patterns(ctx); - insertTilingPatterns(patterns, options); - scf::populateSCFForLoopCanonicalizationPatterns(patterns); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); - (void)applyPatternsAndFoldGreedily( - funcOp, getLinalgTilingCanonicalizationPatterns(ctx)); - // Drop the marker. - funcOp.walk([](LinalgOp op) { - op->removeAttr(LinalgTransforms::kLinalgTransformMarker); - }); - - // Apply swap pattern after generating loop nest and running - // canonicalizations. - applyExtractSliceOfPadTensorSwapPattern(funcOp); - } - - LinalgTilingLoopType loopTypeEnum; -}; - -} // namespace - -std::unique_ptr> -mlir::createLinalgTilingPass(ArrayRef tileSizes, - linalg::LinalgTilingLoopType loopType) { - return std::make_unique(tileSizes, loopType); -} diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index 938b9e7..58923bc 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -356,33 +356,13 @@ void mlir::linalg::peelTiledLinalgOp(RewriterBase &rewriter, TiledLinalgOp &res, } } -/// Linalg tiling pattern. -mlir::linalg::LinalgTilingPattern::LinalgTilingPattern( - MLIRContext *context, LinalgTilingOptions options, - LinalgTransformationFilter f, PatternBenefit benefit) - : OpInterfaceRewritePattern(context, benefit), - filter(std::move(f)), options(std::move(options)) {} - -mlir::linalg::LinalgTilingPattern::LinalgTilingPattern( - StringRef opName, MLIRContext *context, LinalgTilingOptions options, - LinalgTransformationFilter f, PatternBenefit benefit) - : OpInterfaceRewritePattern(context, benefit), - filter(f.addOpNameFilter(opName)), options(std::move(options)) {} - FailureOr -mlir::linalg::LinalgTilingPattern::returningMatchAndRewrite( - LinalgOp op, PatternRewriter &rewriter) const { - if (failed(filter.checkAndNotify(rewriter, op))) - return failure(); - +mlir::linalg::tileWithLinalgTilingOptions(RewriterBase &rewriter, LinalgOp op, + const LinalgTilingOptions &options) { FailureOr res = tileLinalgOp(rewriter, op, options); if (failed(res)) return failure(); - // Clear filter to stop recursive pattern application. - // This must be done here to properly propagate to peeling branches. - filter.replaceLinalgTransformationFilter(rewriter, res->op); - // Peel the loops of the TiledLinalgOp. peelTiledLinalgOp(rewriter, *res, options.peeledLoops, options.loopType); diff --git a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir deleted file mode 100644 index 6178aa3..0000000 --- a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir +++ /dev/null @@ -1,219 +0,0 @@ -// RUN: mlir-opt %s -test-linalg-transform-patterns=test-tile-and-distribute-options -split-input-file | FileCheck %s - -func.func @gemm1(%a : memref, %b : memref, %c : memref) -{ - linalg.matmul {__internal_linalg_transform__ = "distribute1"} - ins(%a, %b: memref, memref) - outs(%c: memref) - return -} -// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)> -// CHECK: func @gemm1( -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y -// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x -// CHECK: scf.for %[[ARG3:.*]] = -// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]] -// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]] -// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]] -// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] - -// ----- - -func.func @gemm2(%a : memref, %b : memref, %c : memref) -{ - linalg.matmul {__internal_linalg_transform__ = "distribute2"} - ins(%a, %b: memref, memref) - outs(%c:memref) - return -} -// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)> -// CHECK: func @gemm2( -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y -// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x -// CHECK: %[[ITERY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[ITERX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[INBOUNDSY:.*]] = arith.cmpi slt, %[[ITERY]], %{{.*}} -// CHECK: %[[INBOUNDSX:.*]] = arith.cmpi slt, %[[ITERX]], %{{.*}} -// CHECK: %[[INBOUNDS:.*]] = arith.andi %[[INBOUNDSY]], %[[INBOUNDSX]] -// CHECK: scf.if %[[INBOUNDS]] -// CHECK: scf.for %[[ARG3:.*]] = -// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]] -// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]] -// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]] -// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] - -// ----- - -func.func @gemm3(%a : memref, %b : memref, %c : memref) -{ - linalg.matmul {__internal_linalg_transform__ = "distribute3"} - ins(%a, %b: memref, memref) - outs(%c: memref) - return -} -// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)> -// CHECK: func @gemm3( -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y -// CHECK-DAG: %[[NBLOCKSY:.*]] = gpu.grid_dim y -// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x -// CHECK-DAG: %[[NBLOCKSX:.*]] = gpu.grid_dim x -// CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]] -// CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[STEPX:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX]]] -// CHECK: scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) = (%[[LBY]], %[[LBX]]) to (%{{.*}}, %{{.*}}) step (%[[STEPY]], %[[STEPX]]) -// CHECK: scf.for %[[ARG5:.*]] = -// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG5]]] -// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG5]], %[[ARG4]]] -// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[ARG4]]] -// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] - -// ----- - -func.func @gemm4(%a : memref, %b : memref, %c : memref) -{ - linalg.matmul {__internal_linalg_transform__ = "distribute4"} - ins(%a, %b: memref, memref) - outs(%c: memref) - return -} -// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)> -// CHECK: func @gemm4( -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y -// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x -// CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[INBOUNDS:.*]] = arith.cmpi slt, %[[LBX]], %{{.*}} -// CHECK: scf.if %[[INBOUNDS]] -// CHECK: scf.for %[[ARG3:.*]] = -// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]] -// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]] -// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]] -// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] - -// ----- - -func.func @gemm5(%a : memref, %b : memref, %c : memref) -{ - linalg.matmul {__internal_linalg_transform__ = "distribute5"} - ins(%a, %b: memref, memref) - outs(%c: memref) - return -} -// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)> -// CHECK: func @gemm5( -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y -// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x -// CHECK-DAG: %[[NBLOCKSX:.*]] = gpu.grid_dim x -// CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[STEPX:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX]]] -// CHECK: %[[INBOUNDS:.*]] = arith.cmpi slt, %[[LBY]], %{{.*}} -// CHECK: scf.if %[[INBOUNDS]] -// CHECK: scf.parallel (%[[ARG3:.*]]) = (%[[LBX]]) to (%{{.*}}) step (%[[STEPX]]) -// CHECK: scf.for %[[ARG4:.*]] = -// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG4]]] -// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG4]], %[[ARG3]]] -// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[ARG3]]] -// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] - -// ----- - -func.func @gemm6(%a : memref, %b : memref, %c : memref) -{ - linalg.matmul {__internal_linalg_transform__ = "distribute6"} - ins(%a, %b: memref, memref) - outs(%c: memref) - return -} -// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)> -// CHECK: func @gemm6( -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y -// CHECK-DAG: %[[NBLOCKSY:.*]] = gpu.grid_dim y -// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x -// CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]] -// CHECK: scf.parallel (%[[ARG3:.*]]) = (%[[LBY]]) to (%{{.*}}) step (%[[STEPY]]) -// CHECK: scf.for %[[ARG4:.*]] = -// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG4]]] -// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG4]], %[[OFFSETX]]] -// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[OFFSETX_2]]] -// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] - -// ----- - -// CHECK: #[[MULMAP:.+]] = affine_map<()[s0, s1] -> (s0 * s1)> -// CHECK: #[[ADDMAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> -// CHECK: func @matmul_tensors( -// CHECK-SAME: %[[TA:[0-9a-z]+]]: tensor -// CHECK-SAME: %[[TB:[0-9a-z]+]]: tensor -// CHECK-SAME: %[[TC:[0-9a-z]+]]: tensor) -> tensor { -func.func @matmul_tensors( - %arg0: tensor, %arg1: tensor, %arg2: tensor) - -> tensor { -// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y -// CHECK-DAG: %[[NBLOCKSY:.*]] = gpu.grid_dim y -// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x -// CHECK-DAG: %[[NBLOCKSX:.*]] = gpu.grid_dim x -// CHECK: %[[MUL:.+]] = affine.apply #[[MULMAP]]()[%[[BIDY]], %[[C8]]] -// CHECK: %[[LBY:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]] -// CHECK: %[[STEPY:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSY]], %[[C8]]] -// CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor) { -// CHECK: %[[MUL:.+]] = affine.apply #[[MULMAP]]()[%[[BIDX]], %[[C8]]] -// CHECK: %[[LBX:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]] -// CHECK: %[[STEPX:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSX]], %[[C8]]] -// CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor) { -// CHECK: %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor) { -// CHECK: %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor to tensor -// CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor to tensor -// CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor to tensor -// CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor, tensor) -// CHECK-SAME: outs(%[[sTC]] : tensor) -> tensor -// CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor -// CHECK: scf.yield %[[TD]] : tensor -// CHECK: scf.yield %[[TD2]] : tensor -// CHECK: scf.yield %[[TD1]] : tensor - %0 = linalg.matmul {__internal_linalg_transform__ = "tensors_distribute1"} - ins(%arg0, %arg1: tensor, tensor) - outs(%arg2: tensor) - -> tensor - -// CHECK: return %[[TD0]] : tensor - return %0 : tensor -} - diff --git a/mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir deleted file mode 100644 index f8f102e..0000000 --- a/mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir +++ /dev/null @@ -1,110 +0,0 @@ -// RUN: mlir-opt %s -test-linalg-transform-patterns="test-tile-pattern tile-sizes=256,128,512 peeled-loops=0" -canonicalize | \ -// RUN: FileCheck %s -check-prefix=CHECK-PEEL-0 - -// RUN: mlir-opt %s -test-linalg-transform-patterns="test-tile-pattern tile-sizes=256,128,512 peeled-loops=1,2" -canonicalize | \ -// RUN: FileCheck %s -check-prefix=CHECK-PEEL-12 - -// CHECK-PEEL-0: func @matmul_static_tensor -// CHECK-PEEL-0-DAG: %[[c0:.*]] = arith.constant 0 : index -// CHECK-PEEL-0-DAG: %[[c128:.*]] = arith.constant 128 : index -// CHECK-PEEL-0-DAG: %[[c256:.*]] = arith.constant 256 : index -// CHECK-PEEL-0-DAG: %[[c512:.*]] = arith.constant 512 : index -// CHECK-PEEL-0-DAG: %[[c1280:.*]] = arith.constant 1280 : index -// CHECK-PEEL-0-DAG: %[[c1600:.*]] = arith.constant 1600 : index -// CHECK-PEEL-0-DAG: %[[c1700:.*]] = arith.constant 1700 : index -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %[[c1280]] step %[[c256]] {{.*}} { -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %[[c1700]] step %[[c128]] {{.*}} { -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %[[c1600]] step %[[c512]] {{.*}} { -// CHECK-PEEL-0: linalg.matmul ins({{.*}} : tensor<256x?xf32>, tensor) outs({{.*}} : tensor<256x?xf32>) -// CHECK-PEEL-0: } -// CHECK-PEEL-0: } -// CHECK-PEEL-0: } -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %[[c1700]] step %[[c128]] {{.*}} { -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %[[c1600]] step %[[c512]] {{.*}} { -// CHECK-PEEL-0: linalg.matmul ins({{.*}} : tensor<220x?xf32>, tensor) outs({{.*}} : tensor<220x?xf32>) -// CHECK-PEEL-0: } -// CHECK-PEEL-0: } - -// CHECK-PEEL-12: func @matmul_static_tensor -// CHECK-PEEL-12-DAG: %[[c0:.*]] = arith.constant 0 : index -// CHECK-PEEL-12-DAG: %[[c128:.*]] = arith.constant 128 : index -// CHECK-PEEL-12-DAG: %[[c256:.*]] = arith.constant 256 : index -// CHECK-PEEL-12-DAG: %[[c512:.*]] = arith.constant 512 : index -// CHECK-PEEL-12-DAG: %[[c1500:.*]] = arith.constant 1500 : index -// CHECK-PEEL-12-DAG: %[[c1536:.*]] = arith.constant 1536 : index -// CHECK-PEEL-12-DAG: %[[c1600:.*]] = arith.constant 1600 : index -// CHECK-PEEL-12-DAG: %[[c1664:.*]] = arith.constant 1664 : index -// CHECK-PEEL-12: scf.for %{{.*}} = %[[c0]] to %[[c1500]] step %[[c256]] {{.*}} { -// CHECK-PEEL-12: scf.for %{{.*}} = %[[c0]] to %[[c1664]] step %[[c128]] {{.*}} { -// CHECK-PEEL-12: scf.for %{{.*}} = %[[c0]] to %[[c1536]] step %[[c512]] {{.*}} { -// CHECK-PEEL-12: linalg.matmul ins({{.*}} : tensor, tensor<512x128xf32>) outs({{.*}} : tensor) -// CHECK-PEEL-12: } -// CHECK-PEEL-12: linalg.matmul ins({{.*}} : tensor, tensor<64x128xf32>) outs({{.*}} : tensor) -// CHECK-PEEL-12: } -// CHECK-PEEL-12: scf.for %{{.*}} = %[[c0]] to %[[c1600]] step %[[c512]] {{.*}} { -// CHECK-PEEL-12: linalg.matmul ins({{.*}} : tensor, tensor) outs({{.*}} : tensor) -// CHECK-PEEL-12: } -// CHECK-PEEL-12: } -func.func @matmul_static_tensor(%arg0: tensor<1500x1600xf32>, %arg1: tensor<1600x1700xf32>) - -> tensor<1500x1700xf32> { - %out = tensor.empty() : tensor<1500x1700xf32> - %r = linalg.matmul {__internal_linalg_transform__ = "tile"} - ins(%arg0, %arg1: tensor<1500x1600xf32>, tensor<1600x1700xf32>) - outs(%out: tensor<1500x1700xf32>) -> tensor<1500x1700xf32> - return %r : tensor<1500x1700xf32> -} - -// ----- - -// CHECK-PEEL-0: func @matmul_dynamic_tensor -// CHECK-PEEL-0-DAG: %[[c0:.*]] = arith.constant 0 : index -// CHECK-PEEL-0-DAG: %[[c128:.*]] = arith.constant 128 : index -// CHECK-PEEL-0-DAG: %[[c256:.*]] = arith.constant 256 : index -// CHECK-PEEL-0-DAG: %[[c512:.*]] = arith.constant 512 : index -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c256]] {{.*}} { -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c128]] {{.*}} { -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c512]] {{.*}} { -// CHECK-PEEL-0: linalg.matmul ins({{.*}} : tensor<256x?xf32>, tensor) outs({{.*}} : tensor<256x?xf32>) -// CHECK-PEEL-0: } -// CHECK-PEEL-0: } -// CHECK-PEEL-0: } -// CHECK-PEEL-0: scf.for %{{.*}} { -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c128]] {{.*}} { -// CHECK-PEEL-0: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c512]] {{.*}} { -// CHECK-PEEL-0: linalg.matmul ins({{.*}} : tensor, tensor) outs({{.*}} : tensor) -// CHECK-PEEL-0: } -// CHECK-PEEL-0: } -// CHECK-PEEL-0: } - -// CHECK-PEEL-12: func @matmul_dynamic_tensor -// CHECK-PEEL-12-DAG: %[[c0:.*]] = arith.constant 0 : index -// CHECK-PEEL-12-DAG: %[[c128:.*]] = arith.constant 128 : index -// CHECK-PEEL-12-DAG: %[[c256:.*]] = arith.constant 256 : index -// CHECK-PEEL-12-DAG: %[[c512:.*]] = arith.constant 512 : index -// CHECK-PEEL-12: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c256]] {{.*}} { -// CHECK-PEEL-12: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c128]] {{.*}} { -// CHECK-PEEL-12: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c512]] {{.*}} { -// CHECK-PEEL-12: linalg.matmul ins({{.*}} : tensor, tensor<512x128xf32>) outs({{.*}} : tensor) -// CHECK-PEEL-12: } -// CHECK-PEEL-12: scf.for %{{.*}} { -// CHECK-PEEL-12: linalg.matmul ins({{.*}} : tensor, tensor) outs({{.*}} : tensor) -// CHECK-PEEL-12: } -// CHECK-PEEL-12: } -// CHECK-PEEL-12: scf.for %{{.*}} { -// CHECK-PEEL-12: scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c512]] {{.*}} { -// CHECK-PEEL-12: linalg.matmul ins({{.*}} : tensor, tensor) outs({{.*}} : tensor) -// CHECK-PEEL-12: } -// CHECK-PEEL-12: } -// CHECK-PEEL-12: } -func.func @matmul_dynamic_tensor(%arg0: tensor, %arg1: tensor) - -> tensor { - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %d0 = tensor.dim %arg0, %c0 : tensor - %d1 = tensor.dim %arg1, %c1 : tensor - %out = tensor.empty(%d0, %d1) : tensor - %r = linalg.matmul {__internal_linalg_transform__ = "tile"} - ins(%arg0, %arg1: tensor, tensor) - outs(%out: tensor) -> tensor - return %r : tensor -} diff --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir index 028c93a..f8b1064 100644 --- a/mlir/test/Dialect/Linalg/tile-conv.mlir +++ b/mlir/test/Dialect/Linalg/tile-conv.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3" | FileCheck %s +// RUN: mlir-opt %s -test-transform-dialect-interpreter -canonicalize | FileCheck %s // CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s1 + 1)> // CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s1 + 2)> @@ -10,6 +10,12 @@ func.func @conv(%arg0 : memref, %arg1 : memref, %arg2 : memref return } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.conv_2d"]} in %arg1 + %1, %loop:2 = transform.structured.tile %0 [2, 3] +} + // CHECK: func @conv // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref diff --git a/mlir/test/Dialect/Linalg/tile-indexed.mlir b/mlir/test/Dialect/Linalg/tile-indexed.mlir index fdca6fb..d6e9c0e 100644 --- a/mlir/test/Dialect/Linalg/tile-indexed.mlir +++ b/mlir/test/Dialect/Linalg/tile-indexed.mlir @@ -1,6 +1,4 @@ -// RUN: mlir-opt %s -linalg-tile="tile-sizes=10,25" -split-input-file | FileCheck %s -check-prefix=TILE-10n25 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=25,0" -split-input-file | FileCheck %s -check-prefix=TILE-25n0 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,25" -split-input-file | FileCheck %s -check-prefix=TILE-0n25 +// RUN: mlir-opt %s -test-transform-dialect-interpreter -canonicalize -split-input-file | FileCheck %s -check-prefix=TILE-10n25 func.func @indexed_vector(%arg0: memref<50xindex>) { linalg.generic {indexing_maps = [affine_map<(i) -> (i)>], @@ -12,6 +10,13 @@ func.func @indexed_vector(%arg0: memref<50xindex>) { } return } + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 + %1, %loop:2 = transform.structured.tile %0 [10, 25] +} + // TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)> // TILE-10n25-LABEL: func @indexed_vector // TILE-10n25: %[[C10:.*]] = arith.constant 10 : index @@ -21,19 +26,6 @@ func.func @indexed_vector(%arg0: memref<50xindex>) { // TILE-10n25: %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[I]], %[[J]]) // TILE-10n25: linalg.yield %[[NEW_I]] : index -// TILE-25n0-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)> -// TILE-25n0-LABEL: func @indexed_vector -// TILE-25n0: %[[C25:.*]] = arith.constant 25 : index -// TILE-25n0: scf.for %[[J:.*]] = {{.*}} step %[[C25]] -// TILE-25n0: linalg.generic -// TILE-25n0: %[[I:.*]] = linalg.index 0 : index -// TILE-25n0: %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[I]], %[[J]]) -// TILE-25n0: linalg.yield %[[NEW_I]] : index - -// TILE-0n25-LABEL: func @indexed_vector -// TILE-0n25-NOT: scf.for %[[J:.*]] = {{.*}} step % -// TILE-0n25: linalg.generic - // ----- func.func @indexed_matrix(%arg0: memref<50x50xindex>) { @@ -48,6 +40,13 @@ func.func @indexed_matrix(%arg0: memref<50x50xindex>) { } return } + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 + %1, %loop:2 = transform.structured.tile %0 [10, 25] +} + // TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)> // TILE-10n25-LABEL: func @indexed_matrix // TILE-10n25-DAG: %[[C25:.*]] = arith.constant 25 : index @@ -61,25 +60,3 @@ func.func @indexed_matrix(%arg0: memref<50x50xindex>) { // TILE-10n25: %[[NEW_J:.*]] = affine.apply [[$MAP]](%[[J]], %[[L]]) // TILE-10n25: %[[SUM:.*]] = arith.addi %[[NEW_I]], %[[NEW_J]] : index // TILE-10n25: linalg.yield %[[SUM]] : index - -// TILE-25n0-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)> -// TILE-25n0-LABEL: func @indexed_matrix -// TILE-25n0: %[[C25:.*]] = arith.constant 25 : index -// TILE-25n0: scf.for %[[L:.*]] = {{.*}} step %[[C25]] -// TILE-25n0: linalg.generic -// TILE-25n0: %[[I:.*]] = linalg.index 0 : index -// TILE-25n0: %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[I]], %[[L]]) -// TILE-25n0: %[[J:.*]] = linalg.index 1 : index -// TILE-25n0: %[[SUM:.*]] = arith.addi %[[NEW_I]], %[[J]] : index -// TILE-25n0: linalg.yield %[[SUM]] : index - -// TILE-0n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)> -// TILE-0n25-LABEL: func @indexed_matrix -// TILE-0n25: %[[C25:.*]] = arith.constant 25 : index -// TILE-0n25: scf.for %[[L:.*]] = {{.*}} step %[[C25]] -// TILE-0n25: linalg.generic -// TILE-0n25: %[[I:.*]] = linalg.index 0 : index -// TILE-0n25: %[[J:.*]] = linalg.index 1 : index -// TILE-0n25: %[[NEW_J:.*]] = affine.apply [[$MAP]](%[[J]], %[[L]]) -// TILE-0n25: %[[SUM:.*]] = arith.addi %[[I]], %[[NEW_J]] : index -// TILE-0n25: linalg.yield %[[SUM]] : index diff --git a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir index 6295f91..74e8ebb 100644 --- a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir +++ b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir @@ -1,53 +1,65 @@ -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3" -cse -split-input-file | \ -// RUN: FileCheck %s -check-prefix=TILE2 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,3" -resolve-shaped-type-result-dims -cse -split-input-file | \ -// RUN: FileCheck %s -check-prefix=TILE1 -// This test only checks that tiling does not crash. -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2" -resolve-shaped-type-result-dims -cse -split-input-file - -// TILE2-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 8)> -// TILE2-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 7)> -// TILE2: func @dynamic_pad_tensor( -// TILE2-SAME: %[[IN:.*]]: tensor -// TILE2-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE2-DAG: %[[C1:.*]] = arith.constant 1 : index -// TILE2-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE2-DAG: %[[C3:.*]] = arith.constant 3 : index -// TILE2: %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]] -// TILE2: %[[DIM0:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN0]]] -// TILE2: %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]] -// TILE2: %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]] -// TILE2: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM0]] step %[[C2]] -// TILE2: scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// TILE2: %[[SWAP_RESULT:.*]] = scf.if -// TILE2: tensor.generate -// TILE2: else -// TILE2: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// TILE2: %[[PAD:.*]] = tensor.pad %[[SLICE]] -// TILE2: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// TILE2: return %[[RESULT]] - -// TILE1-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 7)> -// TILE1-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)> -// TILE1: func @dynamic_pad_tensor( -// TILE1-SAME: %[[IN:.*]]: tensor -// TILE1-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE1-DAG: %[[C1:.*]] = arith.constant 1 : index -// TILE1-DAG: %[[C3:.*]] = arith.constant 3 : index -// TILE1: %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]] -// TILE1: %[[DIM1:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN1]]] -// TILE1: %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]] -// TILE1: %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN0]]] -// TILE1: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// TILE1: %[[SWAP_RESULT:.*]] = scf.if -// TILE1: tensor.generate -// TILE1: else -// TILE1: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// TILE1: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}] -// TILE1: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1] -// TILE1: return %[[RESULT]] - -func.func @dynamic_pad_tensor(%input_tensor: tensor, +// RUN: mlir-opt %s -test-transform-dialect-interpreter -canonicalize -cse -split-input-file + +// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 8)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 7)> +// CHECK: func @dynamic_pad_tensor_3_4( +// CHECK-SAME: %[[IN:.*]]: tensor +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]] +// CHECK-DAG: %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]] +// CHECK-DAG: %[[DIM0:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN0]]] +// CHECK-DAG: %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]] +// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM0]] step %[[C2]] +// CHECK: scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = +// CHECK: %[[SWAP_RESULT:.*]] = scf.if +// CHECK: tensor.generate +// CHECK: else +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] +// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: return %[[RESULT]] + +func.func @dynamic_pad_tensor_3_4(%input_tensor: tensor, + %pad_value: f32) -> tensor { + %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %pad_value : f32 + } : tensor to tensor + return %0 : tensor +} + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 + %1, %loops:2 = transform.structured.tile_to_scf_for %0 [2, 3] +} + +// ----- + +// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 7)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)> +// CHECK: func @dynamic_pad_tensor_0_3( +// CHECK-SAME: %[[IN:.*]]: tensor +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]] +// CHECK-DAG: %[[DIM1:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN1]]] +// CHECK-DAG: %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]] +// CHECK-DAG: %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN0]]] +// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = +// CHECK: %[[SWAP_RESULT:.*]] = scf.if +// CHECK: tensor.generate +// CHECK: else +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}] +// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1] +// CHECK: return %[[RESULT]] + +func.func @dynamic_pad_tensor_0_3(%input_tensor: tensor, %pad_value: f32) -> tensor { %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] { ^bb0(%arg1: index, %arg2: index): @@ -56,41 +68,64 @@ func.func @dynamic_pad_tensor(%input_tensor: tensor, return %0 : tensor } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 + %1, %loop = transform.structured.tile_to_scf_for %0 [0, 3] +} + +// ----- + +// CHECK-LABEL: func @static_pad_tensor_3_4( +// CHECK-SAME: %[[IN:.*]]: tensor<7x9xf32> +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[C15:.*]] = arith.constant 15 : index +// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index +// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]] +// CHECK: scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = +// CHECK: %[[SWAP_RESULT:.*]] = scf.if +// CHECK: tensor.generate +// CHECK: else +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] +// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] +// CHECK: return %[[RESULT]] + +func.func @static_pad_tensor_3_4(%input_tensor: tensor<7x9xf32>, + %pad_value: f32) -> tensor<15x16xf32> { + %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %pad_value : f32 + } : tensor<7x9xf32> to tensor<15x16xf32> + return %0 : tensor<15x16xf32> +} + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 + %1, %loops:2 = transform.structured.tile_to_scf_for %0 [2, 3] +} + // ----- -// TILE2-LABEL: func @static_pad_tensor( -// TILE2-SAME: %[[IN:.*]]: tensor<7x9xf32> -// TILE2-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE2-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE2-DAG: %[[C3:.*]] = arith.constant 3 : index -// TILE2-DAG: %[[C15:.*]] = arith.constant 15 : index -// TILE2-DAG: %[[C16:.*]] = arith.constant 16 : index -// TILE2: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]] -// TILE2: scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// TILE2: %[[SWAP_RESULT:.*]] = scf.if -// TILE2: tensor.generate -// TILE2: else -// TILE2: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// TILE2: %[[PAD:.*]] = tensor.pad %[[SLICE]] -// TILE2: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1] -// TILE2: return %[[RESULT]] - - -// TILE1-LABEL: func @static_pad_tensor( -// TILE1-SAME: %[[IN:.*]]: tensor<7x9xf32> -// TILE1-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE1-DAG: %[[C3:.*]] = arith.constant 3 : index -// TILE1-DAG: %[[C16:.*]] = arith.constant 16 : index -// TILE1: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// TILE1: %[[SWAP_RESULT:.*]] = scf.if -// TILE1: tensor.generate -// TILE1: else -// TILE1: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1] -// TILE1: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}] -// TILE1: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1] -// TILE1: return %[[RESULT]] - -func.func @static_pad_tensor(%input_tensor: tensor<7x9xf32>, +// CHECK-LABEL: func @static_pad_tensor_0_3( +// CHECK-SAME: %[[IN:.*]]: tensor<7x9xf32> +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index +// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = +// CHECK: %[[SWAP_RESULT:.*]] = scf.if +// CHECK: tensor.generate +// CHECK: else +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1] +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}] +// CHECK: %[[CAST_SWAP_RESULT:.*]] = tensor.cast %[[SWAP_RESULT]] : tensor to tensor<15x?xf32> +// CHECK: tensor.insert_slice %[[CAST_SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1] +// CHECK: return %[[RESULT]] + +func.func @static_pad_tensor_0_3(%input_tensor: tensor<7x9xf32>, %pad_value: f32) -> tensor<15x16xf32> { %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] { ^bb0(%arg1: index, %arg2: index): @@ -99,25 +134,35 @@ func.func @static_pad_tensor(%input_tensor: tensor<7x9xf32>, return %0 : tensor<15x16xf32> } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 + %1, %loop = transform.structured.tile_to_scf_for %0 [0, 3] +} + // ----- -// TILE1-LABEL: func @static_pad_tile_evenly( -// TILE1-SAME: %[[IN:.*]]: tensor<7x9xf32>, %[[OUT:.*]]: tensor<14x15xf32> -// TILE1-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE1-DAG: %[[C3:.*]] = arith.constant 3 : index -// TILE1-DAG: %[[C15:.*]] = arith.constant 15 : index -// TILE1: %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C15]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = -// TILE1: %[[R2:.*]] = scf.if -// TILE1: %[[GEN:.*]] = tensor.generate -// TILE1: scf.yield %[[GEN]] : tensor<14x3xf32> -// TILE1: else -// TILE1: %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32> -// TILE1: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}] -// TILE1: scf.yield %[[PAD]] : tensor<14x3xf32> -// TILE1: %[[R3:.*]] = tensor.insert_slice %[[R2]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32> -// TILE1: scf.yield %[[R3]] : tensor<14x15xf32> -// TILE1: return %[[RESULT]] : tensor<14x15xf32> -func.func @static_pad_tile_evenly(%input_tensor: tensor<7x9xf32>, +// CHECK-LABEL: func @static_pad_tile_evenly_0_3( +// CHECK-SAME: %[[IN:.*]]: tensor<7x9xf32>, %[[OUT:.*]]: tensor<14x15xf32> +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[C15:.*]] = arith.constant 15 : index +// CHECK: %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C15]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] = +// CHECK: %[[R2:.*]] = scf.if +// CHECK: %[[GEN:.*]] = tensor.generate +// CHECK: %[[cast_0:.*]] = tensor.cast %[[GEN]] : tensor<14x3xf32> to tensor +// CHECK: scf.yield %[[cast_0]] : tensor +// CHECK: else +// CHECK: %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32> +// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}] +// CHECK: %[[cast_1:.*]] = tensor.cast %[[PAD]] : tensor<14x?xf32> to tensor +// CHECK: scf.yield %[[cast_1]] : tensor +// CHECK: %[[cast:.*]] = tensor.cast %[[R2]] : tensor to tensor<14x3xf32> +// CHECK: %[[R3:.*]] = tensor.insert_slice %[[cast]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32> +// CHECK: scf.yield %[[R3]] : tensor<14x15xf32> +// CHECK: return %[[RESULT]] : tensor<14x15xf32> + +func.func @static_pad_tile_evenly_0_3(%input_tensor: tensor<7x9xf32>, %output_tensor: tensor<14x15xf32>, %pad_value: f32) -> tensor<14x15xf32> { %0 = tensor.pad %input_tensor low[0, 0] high[7, 6] { @@ -126,3 +171,9 @@ func.func @static_pad_tile_evenly(%input_tensor: tensor<7x9xf32>, } : tensor<7x9xf32> to tensor<14x15xf32> return %0 : tensor<14x15xf32> } + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 + %1, %loop = transform.structured.tile_to_scf_for %0 [0, 3] +} diff --git a/mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir b/mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir deleted file mode 100644 index dcad7a0..0000000 --- a/mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir +++ /dev/null @@ -1,113 +0,0 @@ -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,4,8 loop-type=parallel" -split-input-file | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2 loop-type=parallel" -split-input-file | FileCheck %s -check-prefix=TILE1 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,4 loop-type=parallel" -split-input-file | FileCheck %s -check-prefix=TILE2 - -func.func @gemm(%arg0 : memref, - %arg1 : memref, - %arg2 : memref) -{ - linalg.matmul ins(%arg0, %arg1: memref, memref) - outs(%arg2: memref) - return -} -// CHECK-LABEL: func @gemm -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index -// CHECK: scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) = -// CHECK-SAME: step (%[[C2]], %[[C4]]) -// CHECK: scf.for %[[ARG5:.*]] = -// CHECK-SAME: step %[[C8]] -// CHECK: %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG5]]] -// CHECK: %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG5]], %[[ARG4]]] -// CHECK: %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]]] -// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] - -// TILE1-LABEL: func @gemm -// TILE1-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE1: scf.parallel (%[[ARG3:.*]]) = -// TILE1-SAME: step (%[[C2]]) -// TILE1: %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0] -// TILE1: %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0] -// TILE1-NOT: memref.subview -// TILE1: linalg.matmul ins(%[[SV1]], %{{.*}} outs(%[[SV3]] - -// TILE2-LABEL: func @gemm -// TILE2-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE2-DAG: %[[C4:.*]] = arith.constant 4 : index -// TILE2: scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) = -// TILE2-SAME: step (%[[C2]], %[[C4]]) -// TILE2: %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0] -// TILE2: %[[SV2:.*]] = memref.subview %{{.*}}[0, %[[ARG4]]] -// TILE2: %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]]] -// TILE2: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] - -// ----- - -#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> -#map1 = affine_map<(d0, d1, d2) -> (d0, d2)> -#map2 = affine_map<(d0, d1, d2) -> (d1)> -#accesses = [#map0, #map1, #map2] -#trait = { - args_in = 2 : i64, - args_out = 1 : i64, - iterator_types = ["reduction", "parallel", "reduction"], - indexing_maps = #accesses -} - -func.func @reduction(%arg0 : memref, - %arg1 : memref, - %arg2 : memref) -{ - linalg.generic #trait - ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) { - ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): - %0 = arith.addf %arg3, %arg4 : f32 - %1 = arith.addf %0, %arg5 : f32 - linalg.yield %1 : f32 - } - return -} - -// CHECK-LABEL: func @reduction -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index -// CHECK: scf.for %[[ARG3:.*]] = -// CHECK-SAME: step %[[C2]] -// CHECK: scf.parallel (%[[ARG4:.*]]) = -// CHECK-SAME: step (%[[C4]]) -// CHECK: scf.for %[[ARG5:.*]] = -// CHECK-SAME: step %[[C8]] -// CHECK: %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]], %[[ARG5]]] -// CHECK: %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG5]]] -// CHECK: %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG4]]] -// CHECK: linalg.generic -// CHECK-SAME: ins(%[[SV1]], %[[SV2]] -// CHECK-SAME: outs(%[[SV3]] - -// TILE1-LABEL: func @reduction -// TILE1-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE1: scf.for %[[ARG3:.*]] = -// TILE1-SAME: step %[[C2]] -// TILE1: %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0, 0] -// TILE1: %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0] -// TILE1-NOT: memref.subview -// TILE1: linalg.generic -// TILE1-SAME: ins(%[[SV1]], %[[SV2]] -// TILE1-SAME: outs(%{{.*}} - -// TILE2-LABEL: func @reduction -// TILE2-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE2-DAG: %[[C4:.*]] = arith.constant 4 : index -// TILE2: scf.for %[[ARG3:.*]] = -// TILE2-SAME: step %[[C2]] -// TILE2: scf.parallel (%[[ARG4:.*]]) = -// TILE2-SAME: step (%[[C4]]) -// TILE2: %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]], 0] -// TILE2: %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0] -// TILE2: %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG4]]] -// TILE2: linalg.generic -// TILE2-SAME: ins(%[[SV1]], %[[SV2]] -// TILE2-SAME: outs(%[[SV3]] diff --git a/mlir/test/Dialect/Linalg/tile-parallel.mlir b/mlir/test/Dialect/Linalg/tile-parallel.mlir deleted file mode 100644 index cf346f9..0000000 --- a/mlir/test/Dialect/Linalg/tile-parallel.mlir +++ /dev/null @@ -1,68 +0,0 @@ -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2 loop-type=parallel" | FileCheck %s -check-prefix=TILE-2 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,2 loop-type=parallel" | FileCheck %s -check-prefix=TILE-02 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,0,2 loop-type=parallel" | FileCheck %s -check-prefix=TILE-002 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4 loop-type=parallel" | FileCheck %s -check-prefix=TILE-234 - -#id_2d = affine_map<(i, j) -> (i, j)> -#pointwise_2d_trait = { - args_in = 2, - args_out = 1, - indexing_maps = [#id_2d, #id_2d, #id_2d], - iterator_types = ["parallel", "parallel"] -} - -func.func @sum(%lhs: memref>, - %rhs: memref>, - %sum: memref>) { - linalg.generic #pointwise_2d_trait - ins(%lhs, %rhs: memref>, - memref>) - outs(%sum : memref>) { - ^bb0(%lhs_in: f32, %rhs_in: f32, %sum_out: f32): - %result = arith.addf %lhs_in, %rhs_in : f32 - linalg.yield %result : f32 - } - return -} -// TILE-2-LABEL: func @sum( -// TILE-2-SAME: [[LHS:%.*]]: memref{{.*}}, [[RHS:%.*]]: memref{{.*}}, [[SUM:%.*]]: memref{{.*}}) { -// TILE-2-DAG: [[C0:%.*]] = arith.constant 0 : index -// TILE-2-DAG: [[C2:%.*]] = arith.constant 2 : index -// TILE-2: [[LHS_ROWS:%.*]] = memref.dim [[LHS]], %c0 -// TILE-2: scf.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_ROWS]]) step ([[C2]]) { -// TILE-2-NO: scf.parallel -// TILE-2: [[LHS_SUBVIEW:%.*]] = memref.subview [[LHS]] -// TILE-2: [[RHS_SUBVIEW:%.*]] = memref.subview [[RHS]] -// TILE-2: [[SUM_SUBVIEW:%.*]] = memref.subview [[SUM]] -// TILE-2: linalg.generic {{.*}} ins([[LHS_SUBVIEW]], [[RHS_SUBVIEW]]{{.*}} outs([[SUM_SUBVIEW]] - -// TILE-02-LABEL: func @sum( -// TILE-02-SAME: [[LHS:%.*]]: memref{{.*}}, [[RHS:%.*]]: memref{{.*}}, [[SUM:%.*]]: memref{{.*}}) { -// TILE-02-DAG: [[C0:%.*]] = arith.constant 0 : index -// TILE-02-DAG: [[C2:%.*]] = arith.constant 2 : index -// TILE-02: [[LHS_COLS:%.*]] = memref.dim [[LHS]], %c1 -// TILE-02: scf.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_COLS]]) step ([[C2]]) { -// TILE-02-NO: scf.parallel -// TILE-02: [[LHS_SUBVIEW:%.*]] = memref.subview [[LHS]] -// TILE-02: [[RHS_SUBVIEW:%.*]] = memref.subview [[RHS]] -// TILE-02: [[SUM_SUBVIEW:%.*]] = memref.subview [[SUM]] -// TILE-02: linalg.generic {{.*}} ins([[LHS_SUBVIEW]], [[RHS_SUBVIEW]]{{.*}} outs([[SUM_SUBVIEW]] - -// TILE-002-LABEL: func @sum( -// TILE-002-SAME: [[LHS:%.*]]: memref{{.*}}, [[RHS:%.*]]: memref{{.*}}, [[SUM:%.*]]: memref{{.*}}) { -// TILE-002-NO: scf.parallel -// TILE-002: linalg.generic {{.*}} ins([[LHS]], [[RHS]]{{.*}} outs([[SUM]] - -// TILE-234-LABEL: func @sum( -// TILE-234-SAME: [[LHS:%.*]]: memref{{.*}}, [[RHS:%.*]]: memref{{.*}}, [[SUM:%.*]]: memref{{.*}}) { -// TILE-234-DAG: [[C0:%.*]] = arith.constant 0 : index -// TILE-234-DAG: [[C2:%.*]] = arith.constant 2 : index -// TILE-234-DAG: [[C3:%.*]] = arith.constant 3 : index -// TILE-234: [[LHS_ROWS:%.*]] = memref.dim [[LHS]], %c0 -// TILE-234: [[LHS_COLS:%.*]] = memref.dim [[LHS]], %c1 -// TILE-234: scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]]) to ([[LHS_ROWS]], [[LHS_COLS]]) step ([[C2]], [[C3]]) { -// TILE-234-NO: scf.parallel -// TILE-234: [[LHS_SUBVIEW:%.*]] = memref.subview [[LHS]] -// TILE-234: [[RHS_SUBVIEW:%.*]] = memref.subview [[RHS]] -// TILE-234: [[SUM_SUBVIEW:%.*]] = memref.subview [[SUM]] -// TILE-234: linalg.generic {{.*}} ins([[LHS_SUBVIEW]], [[RHS_SUBVIEW]]{{.*}} outs([[SUM_SUBVIEW]] diff --git a/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir b/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir deleted file mode 100644 index 9697adf..0000000 --- a/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir +++ /dev/null @@ -1,74 +0,0 @@ -// RUN: mlir-opt %s -test-linalg-transform-patterns="test-tile-scalarize-dynamic-dims" -scf-for-loop-canonicalization -canonicalize -split-input-file | \ -// RUN: FileCheck %s - -// CHECK-LABEL: func @matmul_partly_dynamic_tensor( -// CHECK-SAME: %[[ARG0:.*]]: tensor, %[[ARG1:.*]]: tensor -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK: tensor.dim %[[ARG0]], %[[C0]] : tensor -// CHECK: %[[UB1:.*]] = tensor.dim %[[ARG0]], %[[C0]] : tensor -// CHECK: %[[UB2:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor -// CHECK: scf.for %[[IV0:.*]] = %[[C0]] to %[[UB1]] step %[[C1]] -// CHECK: scf.for %[[IV1:.*]] = %[[C0]] to %[[UB2]] step %[[C1]] -// CHECK: %[[S1:.*]] = tensor.extract_slice %[[ARG0]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] : tensor to tensor<1x1xf32> -// CHECK: %[[S2:.*]] = tensor.extract_slice %[[ARG1]][%[[IV1]], 0] [1, 2000] [1, 1] : tensor to tensor<1x2000xf32> -// CHECK: %[[S3:.*]] = tensor.extract_slice %{{.*}}[%[[IV0]], 0] [1, 2000] [1, 1] : tensor to tensor<1x2000xf32> -// CHECK: linalg.matmul ins(%[[S1]], %[[S2]] : tensor<1x1xf32>, tensor<1x2000xf32>) outs(%[[S3]] : tensor<1x2000xf32>) -> tensor<1x2000xf32> -func.func @matmul_partly_dynamic_tensor(%arg0: tensor, %arg1: tensor) - -> tensor { - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %d0 = tensor.dim %arg0, %c0 : tensor - %out = tensor.empty(%d0) : tensor - %r = linalg.matmul {__internal_linalg_transform__ = "tile"} - ins(%arg0, %arg1: tensor, tensor) - outs(%out: tensor) -> tensor - return %r : tensor -} - -// ----- - -// The input IR of this test case is a tiled and peeled linalg.matmul op. - -// CHECK-LABEL: func @tiled_and_peeled_matmul( -// CHECK: linalg.matmul ins({{.*}} : tensor<32x259xf32>, tensor<259x258xf32>) outs({{.*}} : tensor<32x258xf32>) -> tensor<32x258xf32> -// CHECK: linalg.matmul ins({{.*}} : tensor<1x259xf32>, tensor<259x258xf32>) outs({{.*}} : tensor<1x258xf32>) -> tensor<1x258xf32> -#map0 = affine_map<(d0) -> (64, -d0 + 257)> -#map1 = affine_map<()[s0] -> ((s0 floordiv 32) * 32)> -#map2 = affine_map<(d0)[s0] -> (d0 - (s0 floordiv 32) * 32)> - -func.func @tiled_and_peeled_matmul(%arg0: tensor<257x259xf32>, %arg1: tensor<259x258xf32>, %arg2: tensor<257x258xf32>) -> tensor<257x258xf32> { - %c257 = arith.constant 257 : index - %c64 = arith.constant 64 : index - %cst = arith.constant 0.000000e+00 : f32 - %c0 = arith.constant 0 : index - %c32 = arith.constant 32 : index - %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<257x258xf32>) -> tensor<257x258xf32> - %1 = scf.for %arg3 = %c0 to %c257 step %c64 iter_args(%arg4 = %0) -> (tensor<257x258xf32>) { - %2 = affine.min #map0(%arg3) - %3 = tensor.extract_slice %arg0[%arg3, 0] [%2, 259] [1, 1] : tensor<257x259xf32> to tensor - %4 = tensor.extract_slice %arg4[%arg3, 0] [%2, 258] [1, 1] : tensor<257x258xf32> to tensor - %5 = affine.apply #map1()[%2] - %6 = scf.for %arg5 = %c0 to %5 step %c32 iter_args(%arg6 = %4) -> (tensor) { - %10 = tensor.extract_slice %3[%arg5, 0] [32, 259] [1, 1] : tensor to tensor<32x259xf32> - %11 = tensor.extract_slice %arg6[%arg5, 0] [32, 258] [1, 1] : tensor to tensor<32x258xf32> - %12 = linalg.matmul {__internal_linalg_transform__ = "tile"} ins(%10, %arg1 : tensor<32x259xf32>, tensor<259x258xf32>) outs(%11 : tensor<32x258xf32>) -> tensor<32x258xf32> - %13 = tensor.insert_slice %12 into %arg6[%arg5, 0] [32, 258] [1, 1] : tensor<32x258xf32> into tensor - scf.yield %13 : tensor - } - %7 = arith.cmpi slt, %5, %2 : index - %8 = scf.if %7 -> (tensor) { - %10 = affine.apply #map2(%2)[%2] - %11 = tensor.extract_slice %3[%5, 0] [%10, 259] [1, 1] : tensor to tensor - %12 = tensor.extract_slice %6[%5, 0] [%10, 258] [1, 1] : tensor to tensor - %13 = linalg.matmul {__internal_linalg_transform__ = "tile"} ins(%11, %arg1 : tensor, tensor<259x258xf32>) outs(%12 : tensor) -> tensor - %14 = tensor.insert_slice %13 into %6[%5, 0] [%10, 258] [1, 1] : tensor into tensor - scf.yield %14 : tensor - } else { - scf.yield %6 : tensor - } - %9 = tensor.insert_slice %8 into %arg4[%arg3, 0] [%2, 258] [1, 1] : tensor into tensor<257x258xf32> - scf.yield %9 : tensor<257x258xf32> - } - return %1 : tensor<257x258xf32> -} diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir index 736a0e9..b87d728 100644 --- a/mlir/test/Dialect/Linalg/tile-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4" -split-input-file | FileCheck %s +// RUN: mlir-opt %s -test-transform-dialect-interpreter -split-input-file | FileCheck %s // CHECK-LABEL: func @matmul_tensors( // CHECK-SAME: %[[TA:[0-9a-z]+]]: tensor @@ -27,6 +27,12 @@ func.func @matmul_tensors( return %0 : tensor } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 + %1, %loops:3 = transform.structured.tile %0 [2, 3, 4] +} + // ----- func.func @generic_op_tensors( @@ -52,6 +58,12 @@ func.func @generic_op_tensors( return %4 : tensor } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 + %1, %loops:3 = transform.structured.tile %0 [2, 3, 4] +} + // CHECK-LABEL: func @generic_op_tensors // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor @@ -117,3 +129,8 @@ func.func @fold_extract_slice( return %2 : tensor } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 + %1, %loops:3 = transform.structured.tile %0 [2, 3, 4] +} diff --git a/mlir/test/Dialect/Linalg/tile-zero.mlir b/mlir/test/Dialect/Linalg/tile-zero.mlir deleted file mode 100644 index 147b7c7..0000000 --- a/mlir/test/Dialect/Linalg/tile-zero.mlir +++ /dev/null @@ -1,12 +0,0 @@ -// RUN: mlir-opt -test-linalg-transform-patterns=test-tile-pattern %s | FileCheck %s - -func.func @matmul_zero_tile( - %arg0: tensor, %arg1 : tensor, %arg2 : tensor) -> tensor { - %0 = linalg.matmul {__internal_linalg_transform__ = "tile"} - ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor - return %0 : tensor -} -// CHECK-LABEL: matmul_zero_tile -// CHECK: linalg.matmul -// CHECK-NOT: __internal_linalg_transform__ diff --git a/mlir/test/Dialect/Linalg/tile.mlir b/mlir/test/Dialect/Linalg/tile.mlir deleted file mode 100644 index 0fc2ca6..0000000 --- a/mlir/test/Dialect/Linalg/tile.mlir +++ /dev/null @@ -1,331 +0,0 @@ -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-2 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,2" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-02 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,0,2" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-002 -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-234 - -// TILE-2-DAG: #[[$bound_map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> -// TILE-02-DAG: #[[$bound_map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> -// TILE-002-DAG: #[[$bound_map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> -// TILE-234-DAG: #[[$bound_map_2:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)> -// TILE-234-DAG: #[[$bound_map_3:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 3)> -// TILE-234-DAG: #[[$bound_map_4:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)> - -func.func @matmul(%arg0: memref>, - %arg1: memref>, - %arg2: memref>) { - linalg.matmul - ins(%arg0, %arg1: memref>, - memref>) - outs(%arg2: memref>) - return -} -// TILE-2-LABEL: func @matmul( -// TILE-2-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-2-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} { -// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] -// TILE-2: %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-2: %[[szK:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] -// TILE-2: %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[K]]] [1, 1] : memref> to memref> -// TILE-2: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szK]], %[[N]]] [1, 1] : memref> to memref> -// TILE-2: linalg.matmul ins(%[[sAi]]{{.*}} outs(%[[sCi]] - -// TILE-02-LABEL: func @matmul( -// TILE-02-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-02-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-02: %[[N:.*]] = memref.dim %arg1, %c1 : memref> -// TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} { -// TILE-02: %[[K:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-02: %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[N]]] -// TILE-02: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-02: %[[szK:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[N]]] -// TILE-02: %[[sBj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[K]], %[[szN]]] [1, 1] : memref> to memref> -// TILE-02: %[[sCj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szK]]] [1, 1] : memref> to memref> -// TILE-02: linalg.matmul ins(%{{.*}}, %[[sBj]]{{.*}} outs(%[[sCj]] - -// TILE-002-LABEL: func @matmul( -// TILE-002-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-002-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-002: %[[ubK:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} { -// TILE-002: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-002: %[[szK:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[ubK]]] -// TILE-002: %[[szK_1:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[ubK]]] -// TILE-002: %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-002: %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[K]]] [%[[M]], %[[szK]]] [1, 1] : memref> to memref> -// TILE-002: %[[sBj:.*]] = memref.subview %{{.*}}[%[[K]], 0] [%[[szK_1]], %[[N]]] [1, 1] : memref> to memref> -// TILE-002: linalg.matmul ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}} - -// TILE-234-LABEL: func @matmul( -// TILE-234-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-234-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-234-DAG: %[[C3:.*]] = arith.constant 3 : index -// TILE-234-DAG: %[[C4:.*]] = arith.constant 4 : index -// TILE-234: %[[ubM:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-234: %[[ubK:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-234: %[[ubN:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-234: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[ubM]] step %{{.*}} { -// TILE-234: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[ubN]] step %{{.*}} { -// TILE-234: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} { -// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubM]]] -// TILE-234: %[[szK:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[ubK]]] -// TILE-234: %[[szK_1:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[ubK]]] -// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]] -// TILE-234: %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubM]]] -// TILE-234: %[[szN_1:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]] -// TILE-234: %[[sAik:.*]] = memref.subview %{{.*}}[%[[I]], %[[K]]] [%[[szM]], %[[szK]]] [1, 1] : memref> to memref> -// TILE-234: %[[sBkj:.*]] = memref.subview %{{.*}}[%[[K]], %[[J]]] [%[[szK_1]], %[[szN]]] [1, 1] : memref> to memref> -// TILE-234: %[[sCij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM_1]], %[[szN_1]]] [1, 1] : memref> to memref> -// -// TILE-234: linalg.matmul ins(%[[sAik]], %[[sBkj]]{{.*}} outs(%[[sCij]] - -// When the buffer shapes are known at compile time, it is possible to avoid -// the "min" in subview size computation. This test uses buffer sizes divisible -// by respective tile sizes (M=10 divisble by 2, N=12 divisible by 2 and 3, -// K=16 divisble by 2 and 4). -func.func @matmul_static(%arg0: memref<10x16xf32, strided<[?, 1], offset: ?>>, - %arg1: memref<16x12xf32, strided<[?, 1], offset: ?>>, - %arg2: memref<10x12xf32, strided<[?, 1], offset: ?>>) { - linalg.matmul - ins(%arg0, %arg1: memref<10x16xf32, strided<[?, 1], offset: ?>>, - memref<16x12xf32, strided<[?, 1], offset: ?>>) - outs(%arg2: memref<10x12xf32, strided<[?, 1], offset: ?>>) - return -} -// TILE-2-LABEL: func @matmul_static( -// TILE-2-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref -// TILE-2-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref -// TILE-2-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref -// TILE-2-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-2-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-2-DAG: %[[M:.*]] = arith.constant 10 : index -// TILE-2: scf.for %[[I:.*]] = %{{.*}} to %[[M]] step %{{.*}} { -// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [2, 16] [1, 1] : memref<10x16xf32, strided<[?, 1], offset: ?>> to memref<2x16xf32, strided<[?, 1], offset: ?>> -// TILE-2: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [2, 12] [1, 1] : memref<10x12xf32, strided<[?, 1], offset: ?>> to memref<2x12xf32, strided<[?, 1], offset: ?>> -// TILE-2: linalg.matmul ins(%[[sAi]], %{{.*}}{{.*}} outs(%[[sCi]] - -// TILE-02-LABEL: func @matmul_static( -// TILE-02-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-02-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-02-DAG: %[[N:.*]] = arith.constant 12 : index -// TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} { -// TILE-02: %[[sBj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [16, 2] [1, 1] : memref<16x12xf32, strided<[?, 1], offset: ?>> to memref<16x2xf32, strided<[?, 1], offset: ?>> -// TILE-02: %[[sCj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [10, 2] [1, 1] : memref<10x12xf32, strided<[?, 1], offset: ?>> to memref<10x2xf32, strided<[?, 1], offset: ?>> -// TILE-02: linalg.matmul ins(%{{.*}}, %[[sBj]]{{.*}} outs(%[[sCj]] - -// TILE-002-LABEL: func @matmul_static( -// TILE-002-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-002-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-002-DAG: %[[C16:.*]] = arith.constant 16 : index -// TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[C16]] step %{{.*}} { -// TILE-002: %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[K]]] [10, 2] [1, 1] : memref<10x16xf32, strided<[?, 1], offset: ?>> to memref<10x2xf32, strided<[?, 1], offset: ?>> -// TILE-002: %[[sBj:.*]] = memref.subview %{{.*}}[%[[K]], 0] [2, 12] [1, 1] : memref<16x12xf32, strided<[?, 1], offset: ?>> to memref<2x12xf32, strided<[?, 1], offset: ?>> -// TILE-002: linalg.matmul ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}} - -// TILE-234-LABEL: func @matmul_static( -// TILE-234-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-234-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-234-DAG: %[[C3:.*]] = arith.constant 3 : index -// TILE-234-DAG: %[[C4:.*]] = arith.constant 4 : index -// TILE-234-DAG: %[[C10:.*]] = arith.constant 10 : index -// TILE-234-DAG: %[[C16:.*]] = arith.constant 16 : index -// TILE-234-DAG: %[[C12:.*]] = arith.constant 12 : index -// TILE-234: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[C10]] step %{{.*}} { -// TILE-234: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[C12]] step %{{.*}} { -// TILE-234: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[C16]] step %{{.*}} { -// TILE-234: %[[sAik:.*]] = memref.subview %{{.*}}[%[[I]], %[[K]]] [2, 4] [1, 1] : memref<10x16xf32, strided<[?, 1], offset: ?>> to memref<2x4xf32, strided<[?, 1], offset: ?>> -// TILE-234: %[[sBkj:.*]] = memref.subview %{{.*}}[%[[K]], %[[J]]] [4, 3] [1, 1] : memref<16x12xf32, strided<[?, 1], offset: ?>> to memref<4x3xf32, strided<[?, 1], offset: ?>> -// TILE-234: %[[sCij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [2, 3] [1, 1] : memref<10x12xf32, strided<[?, 1], offset: ?>> to memref<2x3xf32, strided<[?, 1], offset: ?>> -// -// TILE-234: linalg.matmul ins(%[[sAik]], %[[sBkj]]{{.*}} outs(%[[sCij]] - -func.func @matvec(%arg0: memref>, %arg1: memref>, %arg2: memref>) { - linalg.matvec - ins(%arg0, %arg1: memref>, - memref>) - outs(%arg2: memref>) - return -} -// TILE-2-LABEL: func @matvec( -// TILE-2-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref -// TILE-2-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref -// TILE-2-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref -// TILE-2-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-2-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} { -// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] -// TILE-2: %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-2: %[[szN:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] -// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[N]]] [1, 1] : memref> to memref> -// TILE-2: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szN]]] [1] : memref> to memref> -// TILE-2: linalg.matvec ins(%[[sAi]], %{{.*}} outs(%[[sCi]] - -// TILE-02-LABEL: func @matvec( -// TILE-02-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref -// TILE-02-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref -// TILE-02-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref -// TILE-02-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-02-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-02: %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-02: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} { -// TILE-02: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-02: %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[K]]] -// TILE-02: %[[szN_1:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[K]]] -// TILE-02: %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szN]]] [1, 1] : memref> to memref> -// TILE-02: %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN_1]]] [1] : memref> to memref> -// TILE-02: linalg.matvec ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}} - -// TILE-002-LABEL: func @matvec( -// TILE-002-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref -// TILE-002-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref -// TILE-002-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref -// TILE-002-NOT: scf.for - -// TILE-234-LABEL: func @matvec( -// TILE-234-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref -// TILE-234-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref -// TILE-234-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref -// TILE-234-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-234-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-234-DAG: %[[C3:.*]] = arith.constant 3 : index -// TILE-234: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-234: %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref> -// TILE-234: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} { -// TILE-234: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} { -// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[M]]] -// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[K]]] -// TILE-234: %[[szN_1:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[K]]] -// TILE-234: %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[M]]] -// TILE-234: %[[sAij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [1, 1] : memref> to memref> -// TILE-234: %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN_1]]] [1] : memref> to memref> -// TILE-234: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref> to memref> -// -// TILE-234: linalg.matvec ins(%[[sAij]], %[[sBj]]{{.*}} outs(%[[sCi]] - -func.func @dot(%arg0: memref>, %arg1: memref>, %arg2: memref) { - linalg.dot - ins(%arg0, %arg1: memref>, memref>) - outs(%arg2: memref) - return -} -// TILE-2-LABEL: func @dot( -// TILE-2-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-2-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} { -// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] -// TILE-2: %[[szM_1:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] -// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref> to memref> -// TILE-2: %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref> to memref> -// TILE-2: linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs( - -// TILE-02-LABEL: func @dot( -// TILE-02-NOT: scf.for - -// TILE-002-LABEL: func @dot( -// TILE-002-NOT: scf.for - -// TILE-234-LABEL: func @dot( -// TILE-234-DAG: %[[C0:.*]] = arith.constant 0 : index -// TILE-234-DAG: %[[C2:.*]] = arith.constant 2 : index -// TILE-234: %[[ubK:.*]] = memref.dim %{{.*}}, %c0 : memref> -// TILE-234: scf.for %[[I:.*]] = %{{.*}} to %[[ubK]] step %{{.*}} { -// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubK]]] -// TILE-234: %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubK]]] -// TILE-234: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref> to memref> -// TILE-234: %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref> to memref> -// TILE-234: linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs( - -func.func @fill_static(%arg0: memref<127x99xf32>, %arg1: f32) { - linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<127x99xf32>) - return -} -// TILE-2-LABEL: func @fill_static -// TILE-2: for -// TILE-2-NOT: for -// TILE-2: memref.subview{{.*}} : memref<127x99xf32> -// TILE-2: linalg.fill{{.*}} : memref> - -// TILE-02-LABEL: func @fill_static -// TILE-02: for -// TILE-02-NOT: for -// TILE-02: memref.subview{{.*}} : memref<127x99xf32> -// TILE-02: linalg.fill{{.*}} : memref<127x?xf32, strided<[99, 1], offset: ?>> - -// TILE-002-LABEL: func @fill_static -// TILE-002-NOT: for -// TILE-002: linalg.fill{{.*}} : memref<127x99xf32> - -// TILE-234-LABEL: func @fill_static -// TILE-234: for -// TILE-234: for -// TILE-234-NOT: for -// TILE-234: memref.subview{{.*}} : memref<127x99xf32> -// TILE-234: linalg.fill{{.*}} : memref> - - -func.func @fill(%arg0: memref>, %arg1: f32) { - linalg.fill ins(%arg1 : f32) outs(%arg0 : memref>) - return -} -// TILE-2-LABEL: func @fill -// TILE-2: for -// TILE-2-NOT: for -// TILE-2: fill{{.*}} f32 - -// TILE-02-LABEL: func @fill -// TILE-02: for -// TILE-02-NOT: for -// TILE-02: fill{{.*}} f32 - -// TILE-002-LABEL: func @fill -// TILE-002-NOT: for -// TILE-002: fill{{.*}} f32 - -// TILE-234-LABEL: func @fill -// TILE-234: for -// TILE-234: for -// TILE-234-NOT: for -// TILE-234: fill{{.*}} f32 - -#id_2d = affine_map<(i, j) -> (i, j)> -#pointwise_2d_trait = { - args_in = 2, - args_out = 1, - indexing_maps = [#id_2d, #id_2d, #id_2d], - iterator_types = ["parallel", "parallel"] -} - -func.func @pointwise(%arg0: memref>, %arg1: memref>, - %arg2: memref>) { - linalg.generic #pointwise_2d_trait - ins(%arg0, %arg1 : memref>, memref>) - outs(%arg2 : memref>) { - ^bb0(%arg4: f32, %arg5: f32, %arg6: f32): - %4 = arith.addf %arg4, %arg5 : f32 - linalg.yield %4 : f32 - } - return -} -// TILE-2-LABEL: func @pointwise -// TILE-2: for -// TILE-2-NOT: for -// TILE-2: linalg.generic - -// TILE-02-LABEL: func @pointwise -// TILE-02: for -// TILE-02-NOT: for -// TILE-02: linalg.generic - -// TILE-002-LABEL: func @pointwise -// TILE-002-NOT: for -// TILE-002: linalg.generic - -// TILE-234-LABEL: func @pointwise -// TILE-234: for -// TILE-234: for -// TILE-234-NOT: for -// TILE-234: linalg.generic diff --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir index 3502f99..ad3271c 100644 --- a/mlir/test/Dialect/Linalg/transform-patterns.mlir +++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir @@ -1,20 +1,22 @@ -// RUN: mlir-opt %s -test-linalg-transform-patterns=test-patterns -split-input-file -test-transform-dialect-interpreter | FileCheck %s +// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-linalg-transform-patterns=test-patterns -split-input-file | FileCheck %s -// Map corresponding to a 2D memory access where the stride along the last dim is known to be 1. -// CHECK-DAG: #[[$kn:.*]] = affine_map<(d0, d1, d2) -> (d2, d1)> -// CHECK-DAG: #[[$nm:.*]] = affine_map<(d0, d1, d2) -> (d1, d0)> -// CHECK-DAG: #[[$km:.*]] = affine_map<(d0, d1, d2) -> (d2, d0)> +// ----- func.func @dot(%x: memref>, %y: memref>, %v: memref) { - linalg.dot { __internal_linalg_transform__ = "MEM" } - ins(%x, %y: memref>, - memref>) - outs(%v: memref) - + linalg.dot ins(%x, %y: memref>, + memref>) + outs(%v: memref) return } + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.dot"]} in %arg1 + %1, %loop = transform.structured.tile %0 [8000] +} + // CHECK-LABEL: func @dot // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index @@ -28,6 +30,8 @@ func.func @dot(%x: memref>, // CHECK: arith.addf // CHECK: store +// ----- + func.func @matvec(%A: memref>, %x: memref>, %y: memref>) { @@ -37,25 +41,43 @@ func.func @matvec(%A: memref>, outs(%y: memref>) return } + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.matvec"]} in %arg1 + %1, %loops:2 = transform.structured.tile %0 [5, 6] +} + // CHECK-LABEL: func @matvec // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[c5:.*]] = arith.constant 5 : index // CHECK-DAG: %[[c6:.*]] = arith.constant 6 : index -// CHECK: scf.parallel {{.*}} step (%[[c5]]) +// CHECK: scf.for {{.*}} step %[[c5]] // CHECK: scf.for {{.*}} step %[[c6]] // CHECK: linalg.matvec // CHECK: ins({{.*}}: memref>, memref>) // CHECK: outs({{.*}}: memref>) +// ----- + func.func @matmul(%A: memref>, %B: memref>, %C: memref>) { - linalg.matmul { __internal_linalg_transform__ = "MEM" } - ins(%A, %B: memref>, - memref>) - outs(%C: memref>) + linalg.matmul ins(%A, %B: memref>, + memref>) + outs(%C: memref>) return } + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 + %1, %loops:3 = transform.structured.tile %0 [2000, 3000, 4000] + %2, %loops_2:3 = transform.structured.tile %1 [200, 300, 400] + %3, %loops_3:3 = transform.structured.tile %2 [20, 30, 40] + %4, %loops_4:3 = transform.structured.tile %3 [2, 3, 4] +} + // CHECK-LABEL: func @matmul // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[c2:.*]] = arith.constant 2 : index @@ -86,6 +108,13 @@ func.func @matmul(%A: memref>, // CHECK: ins({{.*}}: memref>, memref>) // CHECK: outs({{.*}}: memref>) +// ----- + +// Map corresponding to a 2D memory access where the stride along the last dim is known to be 1. +// CHECK-DAG: #[[$kn:.*]] = affine_map<(d0, d1, d2) -> (d2, d1)> +// CHECK-DAG: #[[$nm:.*]] = affine_map<(d0, d1, d2) -> (d1, d0)> +// CHECK-DAG: #[[$km:.*]] = affine_map<(d0, d1, d2) -> (d2, d0)> + #matmul_accesses = [ affine_map<(m, n, k) -> (m, k)>, affine_map<(m, n, k) -> (k, n)>, @@ -112,6 +141,7 @@ func.func @permute_generic(%A: memref>, } return } + transform.with_pdl_patterns { ^bb0(%arg0: !pdl.operation): transform.sequence %arg0 failures(propagate) { @@ -120,6 +150,7 @@ transform.with_pdl_patterns { transform.structured.interchange %0 { iterator_interchange = [1, 2, 0]} } } + // CHECK-LABEL: func @permute_generic // CHECK: linalg.generic { // CHECK-SAME: indexing_maps = [#[[$kn]], #[[$nm]], #[[$km]]], @@ -129,15 +160,23 @@ transform.with_pdl_patterns { // CHECK-SAME: memref> // CHECK-SAME: memref> +// ----- + func.func @matvec_perm(%A: memref>, %x: memref>, %y: memref>) { - linalg.matvec {__internal_linalg_transform__ = "__with_perm__"} - ins(%A, %x: memref>, - memref>) - outs(%y: memref>) + linalg.matvec ins(%A, %x: memref>, + memref>) + outs(%y: memref>) return } + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.matvec"]} in %arg1 + %1, %loops:2 = transform.structured.tile %0 [5, 6] {interchange = [1, 0]} +} + // CHECK-LABEL: func @matvec_perm // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[c5:.*]] = arith.constant 5 : index @@ -148,15 +187,25 @@ func.func @matvec_perm(%A: memref>, // CHECK: ins({{.*}}: memref>, memref>) // CHECK: outs({{.*}}: memref>) +// ----- + func.func @matmul_perm(%A: memref>, %B: memref>, %C: memref>) { - linalg.matmul {__internal_linalg_transform__ = "__with_perm__"} - ins(%A, %B: memref>, - memref>) - outs(%C : memref>) + linalg.matmul ins(%A, %B: memref>, + memref>) + outs(%C : memref>) return } + +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 + %1, %loops:3 = transform.structured.tile %0 [2000, 3000, 4000] {interchange=[1, 2, 0]} + %2, %loops_2:3 = transform.structured.tile %1 [200, 300, 400] {interchange=[1, 0, 2]} + %3, %loops_3:3 = transform.structured.tile %2 [20, 30, 40] +} + // CHECK-LABEL: func @matmul_perm // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[c20:.*]] = arith.constant 20 : index @@ -180,26 +229,3 @@ func.func @matmul_perm(%A: memref>, // CHECK: linalg.matmul // CHECK: ins({{.*}}: memref>, memref>) // CHECK: outs({{.*}}: memref>) - -func.func @tile_permute_parallel_loop(%arg0: memref, - %arg1: memref, - %arg2: memref) { - linalg.matmul {__internal_linalg_transform__ = "par__with_perm__"} - ins(%arg0, %arg1: memref, memref) - outs(%arg2: memref) - return -} -// CHECK-LABEL: func @tile_permute_parallel_loop -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: memref -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: memref -// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index -// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[D0:.*]] = memref.dim %[[ARG0]], %c0 -// CHECK-DAG: %[[D1:.*]] = memref.dim %[[ARG0]], %c1 -// CHECK-DAG: %[[D2:.*]] = memref.dim %[[ARG1]], %c1 -// CHECK: scf.parallel (%{{.*}}) = (%[[C0]]) to (%[[D2]]) step (%[[C8]]) -// CHECK: scf.for %{{.*}} = %[[C0]] to %[[D1]] step %[[C4]] -// CHECK: scf.parallel (%{{.*}}) = (%[[C0]]) to (%[[D0]]) step (%[[C16]]) diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir index 10c9adb..d79b402 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir @@ -1,9 +1,9 @@ -// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ +// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="tile-sizes=4" -convert-linalg-to-loops -convert-scf-to-cf \ +// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \ // RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ @@ -24,6 +24,12 @@ func.func @conv_1d(%arg0: memref, %arg1: memref, %arg2: memref, %arg1: memref, % return } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.conv_1d_nwc_wcf"]} in %arg1 + %1, %loops:2 = transform.structured.tile %0 [2, 4] +} + func.func @main() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir index 21d2a19..78175ba 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir @@ -1,9 +1,9 @@ -// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ +// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,2" -convert-linalg-to-loops -convert-scf-to-cf \ +// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \ // RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ @@ -24,6 +24,12 @@ func.func @conv_2d(%arg0: memref, %arg1: memref, %arg2: memref return } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.conv_2d"]} in %arg1 + %1, %loops:2 = transform.structured.tile %0 [2, 2] +} + func.func @main() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir index 51708ea..b675f87 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir @@ -1,9 +1,9 @@ -// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ +// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,3,2" -convert-linalg-to-loops -convert-scf-to-cf \ +// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \ // RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ @@ -26,6 +26,12 @@ func.func @conv_2d_nhwc_hwcf(%arg0: memref, %arg1: memref, %arg1: memref, %arg2: me return } +transform.sequence failures(propagate) { + ^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.conv_3d"]} in %arg1 + %1, %loops:3 = transform.structured.tile %0 [2, 2, 2] +} + func.func @main() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir index a4a51b8..d7245d3 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir @@ -1,9 +1,9 @@ -// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ +// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ // RUN: | FileCheck %s -// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,5,5,5" -convert-linalg-to-loops -convert-scf-to-cf \ +// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \ // RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ @@ -26,6 +26,11 @@ func.func @conv_3d_ndhwc_dhwcf(%arg0: memref, %arg1: memref) diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp index 781936f..ad27637 100644 --- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp @@ -61,10 +61,6 @@ struct TestLinalgTransforms Option testPatterns{*this, "test-patterns", llvm::cl::desc("Test a mixed set of patterns"), llvm::cl::init(false)}; - Option testTileAndDistributionOptions{ - *this, "test-tile-and-distribute-options", - llvm::cl::desc("Test tile and distribute options"), - llvm::cl::init(false)}; Option testVectorTransferForwardingPatterns{ *this, "test-vector-transfer-forwarding-patterns", llvm::cl::desc( @@ -75,13 +71,6 @@ struct TestLinalgTransforms llvm::cl::desc("Test a set of patterns that rewrite a linalg contraction " "in vector.contract form"), llvm::cl::init(false)}; - Option testTilePattern{*this, "test-tile-pattern", - llvm::cl::desc("Test tile pattern"), - llvm::cl::init(false)}; - Option testTileScalarizeDynamicDims{ - *this, "test-tile-scalarize-dynamic-dims", - llvm::cl::desc("Test tiling of dynamic dims by 1"), - llvm::cl::init(false)}; Option testTransformPadTensor{ *this, "test-transform-pad-tensor", llvm::cl::desc("Test transform pad tensor by copying with generic ops"), @@ -136,90 +125,11 @@ static void applyPatterns(func::FuncOp funcOp) { RewritePatternSet patterns(ctx); //===--------------------------------------------------------------------===// - // Linalg tiling patterns. - //===--------------------------------------------------------------------===// - patterns.add( - MatmulOp::getOperationName(), ctx, - LinalgTilingOptions().setTileSizes({2000, 3000, 4000}), - LinalgTransformationFilter(StringAttr::get(ctx, "MEM"), - StringAttr::get(ctx, "L3"))); - patterns.add( - MatmulOp::getOperationName(), ctx, - LinalgTilingOptions().setTileSizes({200, 300, 400}), - LinalgTransformationFilter(StringAttr::get(ctx, "L3"), - StringAttr::get(ctx, "L2"))); - patterns.add( - MatmulOp::getOperationName(), ctx, - LinalgTilingOptions().setTileSizes({20, 30, 40}), - LinalgTransformationFilter(StringAttr::get(ctx, "L2"), - StringAttr::get(ctx, "L1"))); - patterns.add( - MatmulOp::getOperationName(), ctx, - LinalgTilingOptions().setTileSizes({2, 3, 4}), - LinalgTransformationFilter(StringAttr::get(ctx, "L1"), - StringAttr::get(ctx, "REG"))); - - patterns.add( - MatvecOp::getOperationName(), ctx, - LinalgTilingOptions().setTileSizes({5, 6}).setLoopType( - LinalgTilingLoopType::ParallelLoops), - LinalgTransformationFilter(ArrayRef{}, - StringAttr::get(ctx, "L1"))); - - patterns.add( - DotOp::getOperationName(), ctx, LinalgTilingOptions().setTileSizes(8000), - LinalgTransformationFilter( - ArrayRef{StringAttr::get(ctx, "MEM"), - StringAttr::get(ctx, "L3"), - StringAttr::get(ctx, "L2")}, - StringAttr::get(ctx, "REG"))); - - //===--------------------------------------------------------------------===// - // Linalg tiling and permutation patterns. - //===--------------------------------------------------------------------===// - patterns.add( - MatmulOp::getOperationName(), ctx, - LinalgTilingOptions() - .setTileSizes({2000, 3000, 4000}) - .setInterchange({1, 2, 0}), - LinalgTransformationFilter(StringAttr::get(ctx, "__with_perm__"), - StringAttr::get(ctx, "L2__with_perm__"))); - patterns.add( - MatmulOp::getOperationName(), ctx, - LinalgTilingOptions() - .setTileSizes({200, 300, 400}) - .setInterchange({1, 0, 2}), - LinalgTransformationFilter(StringAttr::get(ctx, "L2__with_perm__"), - StringAttr::get(ctx, "L1__with_perm__"))); - patterns.add( - MatmulOp::getOperationName(), ctx, - LinalgTilingOptions().setTileSizes({20, 30, 40}), - LinalgTransformationFilter(StringAttr::get(ctx, "L1__with_perm__"), - StringAttr::get(ctx, "REG__with_perm__"))); - - patterns.add( - MatvecOp::getOperationName(), ctx, - LinalgTilingOptions().setTileSizes({5, 6}).setInterchange({1, 0}), - LinalgTransformationFilter(StringAttr::get(ctx, "__with_perm__"), - StringAttr::get(ctx, "L1__with_perm__"))); - - patterns.add( - MatmulOp::getOperationName(), ctx, - LinalgTilingOptions() - .setTileSizes({16, 8, 4}) - .setInterchange({1, 2, 0}) - .setLoopType(LinalgTilingLoopType::ParallelLoops), - LinalgTransformationFilter( - StringAttr::get(ctx, "par__with_perm__"), - StringAttr::get(ctx, "after_par__with_perm__"))); - - //===--------------------------------------------------------------------===// // Linalg to loops patterns. //===--------------------------------------------------------------------===// patterns.add>( ctx, - /*loweringType=*/LinalgLoweringType::Loops, - LinalgTransformationFilter(StringAttr::get(ctx, "REG"))); + /*loweringType=*/LinalgLoweringType::Loops); //===--------------------------------------------------------------------===// // Linalg distribution patterns. @@ -239,178 +149,6 @@ static void applyPatterns(func::FuncOp funcOp) { }); } -template -static SmallVector -getGpuProcIds(OpBuilder &b, Location loc, ArrayRef parallelLoopRanges, - ArrayRef distributionMethod) { - size_t count = std::min(3, parallelLoopRanges.size()); - SmallVector procInfo(count); - Type indexType = b.getIndexType(); - for (unsigned i = 0; i < count; ++i) { - gpu::Dimension dim = *gpu::symbolizeDimension(i); - procInfo[count - 1 - i] = {b.create(loc, indexType, dim), - b.create(loc, indexType, dim), - distributionMethod[count - 1 - i]}; - } - return procInfo; -} - -static void fillTileAndDistributePatterns(MLIRContext *context, - RewritePatternSet &patterns) { - { - LinalgLoopDistributionOptions cyclicNprocsEqNiters; - SmallVector distributionMethod = { - DistributionMethod::CyclicNumProcsEqNumIters, - DistributionMethod::CyclicNumProcsEqNumIters}; - cyclicNprocsEqNiters.procInfo = - [distributionMethod](OpBuilder &b, Location loc, - ArrayRef parallelLoopRanges) { - return getGpuProcIds( - b, loc, parallelLoopRanges, distributionMethod); - }; - patterns.add( - MatmulOp::getOperationName(), context, - LinalgTilingOptions() - .setTileSizes({8, 8, 4}) - .setLoopType(LinalgTilingLoopType::ParallelLoops) - .setDistributionOptions(cyclicNprocsEqNiters), - LinalgTransformationFilter( - StringAttr::get(context, "distribute1"), - StringAttr::get(context, "after_distribute1"))); - } - - { - LinalgLoopDistributionOptions cyclicNprocsGeNiters; - SmallVector distributionMethod = { - DistributionMethod::CyclicNumProcsGeNumIters, - DistributionMethod::CyclicNumProcsGeNumIters}; - cyclicNprocsGeNiters.procInfo = - [distributionMethod](OpBuilder &b, Location loc, - ArrayRef parallelLoopRanges) { - return getGpuProcIds( - b, loc, parallelLoopRanges, distributionMethod); - }; - patterns.add( - MatmulOp::getOperationName(), context, - LinalgTilingOptions() - .setTileSizes({8, 8, 4}) - .setLoopType(LinalgTilingLoopType::ParallelLoops) - .setDistributionOptions(cyclicNprocsGeNiters), - LinalgTransformationFilter( - StringAttr::get(context, "distribute2"), - StringAttr::get(context, "after_distribute2"))); - } - - { - LinalgLoopDistributionOptions cyclicNprocsDefault; - SmallVector distributionMethod = { - DistributionMethod::Cyclic, DistributionMethod::Cyclic}; - cyclicNprocsDefault.procInfo = - [distributionMethod](OpBuilder &b, Location loc, - ArrayRef parallelLoopRanges) { - return getGpuProcIds( - b, loc, parallelLoopRanges, distributionMethod); - }; - patterns.add( - MatmulOp::getOperationName(), context, - LinalgTilingOptions() - .setTileSizes({8, 8, 4}) - .setLoopType(LinalgTilingLoopType::ParallelLoops) - .setDistributionOptions(cyclicNprocsDefault), - LinalgTransformationFilter( - StringAttr::get(context, "distribute3"), - StringAttr::get(context, "after_distribute3"))); - } - - { - LinalgLoopDistributionOptions cyclicNprocsMixed1; - SmallVector distributionMethod = { - DistributionMethod::CyclicNumProcsEqNumIters, - DistributionMethod::CyclicNumProcsGeNumIters}; - cyclicNprocsMixed1.procInfo = - [distributionMethod](OpBuilder &b, Location loc, - ArrayRef parallelLoopRanges) { - return getGpuProcIds( - b, loc, parallelLoopRanges, distributionMethod); - }; - patterns.add( - MatmulOp::getOperationName(), context, - LinalgTilingOptions() - .setTileSizes({8, 8, 4}) - .setLoopType(LinalgTilingLoopType::ParallelLoops) - .setDistributionOptions(cyclicNprocsMixed1), - LinalgTransformationFilter( - StringAttr::get(context, "distribute4"), - StringAttr::get(context, "after_distribute4"))); - } - - { - LinalgLoopDistributionOptions cyclicNprocsMixed2; - SmallVector distributionMethod = { - DistributionMethod::CyclicNumProcsGeNumIters, - DistributionMethod::Cyclic}; - cyclicNprocsMixed2.procInfo = - [distributionMethod](OpBuilder &b, Location loc, - ArrayRef parallelLoopRanges) { - return getGpuProcIds( - b, loc, parallelLoopRanges, distributionMethod); - }; - patterns.add( - MatmulOp::getOperationName(), context, - LinalgTilingOptions() - .setTileSizes({8, 8, 4}) - .setLoopType(LinalgTilingLoopType::ParallelLoops) - .setDistributionOptions(cyclicNprocsMixed2), - LinalgTransformationFilter( - StringAttr::get(context, "distribute5"), - StringAttr::get(context, "after_distribute5"))); - } - - { - LinalgLoopDistributionOptions cyclicNprocsMixed3; - SmallVector distributionMethod = { - DistributionMethod::Cyclic, - DistributionMethod::CyclicNumProcsEqNumIters}; - cyclicNprocsMixed3.procInfo = - [distributionMethod](OpBuilder &b, Location loc, - ArrayRef parallelLoopRanges) { - return getGpuProcIds( - b, loc, parallelLoopRanges, distributionMethod); - }; - - patterns.add( - MatmulOp::getOperationName(), context, - LinalgTilingOptions() - .setTileSizes({8, 8, 4}) - .setLoopType(LinalgTilingLoopType::ParallelLoops) - .setDistributionOptions(cyclicNprocsMixed3), - LinalgTransformationFilter( - StringAttr::get(context, "distribute6"), - StringAttr::get(context, "after_distribute6"))); - } - - { - LinalgLoopDistributionOptions cyclicNprocsEqNiters; - SmallVector distributionMethod = { - DistributionMethod::Cyclic, DistributionMethod::Cyclic}; - cyclicNprocsEqNiters.procInfo = - [distributionMethod](OpBuilder &b, Location loc, - ArrayRef parallelLoopRanges) { - return getGpuProcIds( - b, loc, parallelLoopRanges, distributionMethod); - }; - patterns.add( - MatmulOp::getOperationName(), context, - LinalgTilingOptions() - .setTileSizes({8, 8, 4}) - .setLoopType(LinalgTilingLoopType::Loops) - .setDistributionOptions(cyclicNprocsEqNiters), - LinalgTransformationFilter( - StringAttr::get(context, "tensors_distribute1"), - StringAttr::get(context, "tensors_after_distribute1"))); - } -} - static void applyVectorTransferForwardingPatterns(func::FuncOp funcOp) { RewritePatternSet forwardPattern(funcOp.getContext()); forwardPattern.add(funcOp.getContext()); @@ -445,33 +183,6 @@ static void applyExtractSliceOfPadTensorSwapPattern(func::FuncOp funcOp) { (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); } -static void applyTilePattern(func::FuncOp funcOp, const std::string &loopType, - ArrayRef tileSizes, - ArrayRef peeledLoops, - bool scalarizeDynamicDims) { - MLIRContext *context = funcOp.getContext(); - RewritePatternSet tilingPattern(context); - LinalgTilingLoopType type = - llvm::StringSwitch(loopType) - .Case("for", LinalgTilingLoopType::Loops) - .Case("affine", LinalgTilingLoopType::AffineLoops) - .Case("parallel", LinalgTilingLoopType::ParallelLoops); - auto linalgTilingOptions = linalg::LinalgTilingOptions() - .setPeeledLoops(peeledLoops) - .setLoopType(type); - if (scalarizeDynamicDims) { - linalgTilingOptions.scalarizeDynamicDims(); - assert(tileSizes.empty() && - "tileSizes and scalarizeDynamicDims is mutually exclusive"); - } else { - linalgTilingOptions.setTileSizes(tileSizes); - } - linalg::LinalgTransformationFilter f(StringAttr::get(context, "tile")); - TilingPatterns::insert( - tilingPattern, linalgTilingOptions, f); - (void)applyPatternsAndFoldGreedily(funcOp, std::move(tilingPattern)); -} - static void applySplitReduction(func::FuncOp funcOp) { RewritePatternSet patterns(funcOp.getContext()); linalg::populateSplitReductionPattern( @@ -521,12 +232,6 @@ void TestLinalgTransforms::runOnOperation() { }; std::unique_ptr cleanupGuard{(void *)1, lambda}; - if (testTileAndDistributionOptions) { - RewritePatternSet patterns(&getContext()); - fillTileAndDistributePatterns(&getContext(), patterns); - (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); - return; - } if (testPatterns) return applyPatterns(getOperation()); if (testVectorTransferForwardingPatterns) @@ -539,12 +244,6 @@ void TestLinalgTransforms::runOnOperation() { return applyGeneralizePadTensorPatterns(getOperation()); if (testSwapSubTensorPadTensor) return applyExtractSliceOfPadTensorSwapPattern(getOperation()); - if (testTilePattern) - return applyTilePattern(getOperation(), loopType, tileSizes, peeledLoops, - /*scalarizeDynamicDims=*/false); - if (testTileScalarizeDynamicDims) - return applyTilePattern(getOperation(), loopType, tileSizes, - /*peeledLoops=*/{}, /*scalarizeDynamicDims=*/true); if (testSplitReduction) return applySplitReduction(getOperation()); if (testSplitReductionInnerParallel) diff --git a/mlir/test/lib/Dialect/Transform/TestTransformDialectInterpreter.cpp b/mlir/test/lib/Dialect/Transform/TestTransformDialectInterpreter.cpp index e74be0d..ad5dcab 100644 --- a/mlir/test/lib/Dialect/Transform/TestTransformDialectInterpreter.cpp +++ b/mlir/test/lib/Dialect/Transform/TestTransformDialectInterpreter.cpp @@ -57,10 +57,39 @@ public: llvm::cl::desc("perform expensive checks to better report errors in the " "transform IR")}; }; + +struct TestTransformDialectEraseSchedulePass + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID( + TestTransformDialectEraseSchedulePass) + + StringRef getArgument() const final { + return "test-transform-dialect-erase-schedule"; + } + + StringRef getDescription() const final { + return "erase transform dialect schedule from the IR"; + } + + void runOnOperation() override { + getOperation()->walk([&](Operation *nestedOp) { + if (isa(nestedOp)) { + nestedOp->erase(); + return WalkResult::skip(); + } + return WalkResult::advance(); + }); + } +}; } // namespace namespace mlir { namespace test { +/// Registers the test pass for erasing transform dialect ops. +void registerTestTransformDialectEraseSchedulePass() { + PassRegistration reg; +} /// Registers the test pass for applying transform dialect ops. void registerTestTransformDialectInterpreterPass() { PassRegistration reg; diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 37d331b..9eb0a47 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -114,6 +114,7 @@ void registerTestSliceAnalysisPass(); void registerTestTensorTransforms(); void registerTestTilingInterface(); void registerTestTopologicalSortAnalysisPass(); +void registerTestTransformDialectEraseSchedulePass(); void registerTestTransformDialectInterpreterPass(); void registerTestVectorLowerings(); void registerTestNvgpuLowerings(); @@ -214,6 +215,7 @@ void registerTestPasses() { mlir::test::registerTestTensorTransforms(); mlir::test::registerTestTilingInterface(); mlir::test::registerTestTopologicalSortAnalysisPass(); + mlir::test::registerTestTransformDialectEraseSchedulePass(); mlir::test::registerTestTransformDialectInterpreterPass(); mlir::test::registerTestVectorLowerings(); mlir::test::registerTestNvgpuLowerings(); -- 2.7.4