namespace mlir {
class AffineForOp;
+class ConversionTarget;
+struct LogicalResult;
class MLIRContext;
class OwningRewritePatternList;
-struct LogicalResult;
class Value;
namespace scf {
void populateParallelLoopToGPUPatterns(OwningRewritePatternList &patterns,
MLIRContext *ctx);
+/// Configures the rewrite target such that only `scf.parallel` operations that
+/// are not rewritten by the provided patterns are legal.
+void configureParallelLoopToGPULegality(ConversionTarget &target);
+
} // namespace mlir
#endif // MLIR_CONVERSION_SCFTOGPU_SCFTOGPU_H_
if (!boundIsPrecise) {
upperBound = deriveStaticUpperBound(upperBound, rewriter);
if (!upperBound) {
- return parallelOp.emitOpError()
- << "cannot derive loop-invariant upper bound for number "
- "of iterations";
+ return rewriter.notifyMatchFailure(
+ parallelOp,
+ "cannot derive loop-invariant upper bound for number of"
+ "iterations");
}
}
// Compute the number of iterations needed. We compute this as an
// todo(herhut,ravishankarm): Update the behavior of setMappingAttr
// when this condition is relaxed.
if (bounds.find(processor) != bounds.end()) {
- return parallelOp.emitOpError()
- << "cannot redefine the bound for processor "
- << static_cast<int64_t>(processor);
+ return rewriter.notifyMatchFailure(
+ parallelOp, "cannot redefine the bound for processor " +
+ Twine(static_cast<int64_t>(processor)));
}
bounds[processor] = launchBound;
}
LogicalResult
ParallelToGpuLaunchLowering::matchAndRewrite(ParallelOp parallelOp,
PatternRewriter &rewriter) const {
+ // We can only transform starting at the outer-most loop. Launches inside of
+ // parallel loops are not supported.
+ if (auto parentLoop = parallelOp.getParentOfType<ParallelOp>())
+ return failure();
// Create a launch operation. We start with bound one for all grid/block
// sizes. Those will be refined later as we discover them from mappings.
Location loc = parallelOp.getLoc();
MLIRContext *ctx) {
patterns.insert<ParallelToGpuLaunchLowering>(ctx);
}
+
+void mlir::configureParallelLoopToGPULegality(ConversionTarget &target) {
+ target.addDynamicallyLegalOp<scf::ParallelOp>([](scf::ParallelOp parallelOp) {
+ return !parallelOp.getAttr(gpu::getMappingAttrName());
+ });
+}
target.addLegalDialect<AffineDialect>();
target.addLegalDialect<gpu::GPUDialect>();
target.addLegalDialect<scf::SCFDialect>();
- target.addIllegalOp<scf::ParallelOp>();
+ configureParallelLoopToGPULegality(target);
if (failed(applyPartialConversion(getOperation(), target,
std::move(patterns))))
signalPassFailure();
// -----
-// Mapping to the same processor twice.
+// Mapping to the same processor twice. Cannot be mapped.
func @parallel_double_map(%arg0 : index, %arg1 : index, %arg2 : index,
%arg3 : index,
%buf : memref<?x?xf32>,
%res : memref<?x?xf32>) {
%four = constant 4 : index
- // expected-error@+2 {{cannot redefine the bound for processor 1}}
- // expected-error@+1 {{failed to legalize operation 'scf.parallel'}}
scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
step (%four, %four) {
} { mapping = [
return
}
+// CHECK-LABEL: @parallel_double_map
+// CHECK: scf.parallel
+
// -----
-// Loop with loop-variant upper bound.
+// Loop with loop-variant upper bound. Cannot be mapped.
func @parallel_loop_loop_variant_bound(%arg0 : index, %arg1 : index, %arg2 : index,
%arg3 : index,
%zero = constant 0 : index
%one = constant 1 : index
%four = constant 4 : index
- // expected-error@+1 {{failed to legalize operation 'scf.parallel'}}
scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
step (%four, %four) {
- // expected-error@+1 {{cannot derive loop-invariant upper bound}}
scf.parallel (%si0, %si1) = (%zero, %zero) to (%i0, %i1)
step (%one, %one) {
%idx0 = addi %i0, %si0 : index
] }
return
}
+
+// CHECK-LABEL: @parallel_loop_loop_variant_bound
+// CHECK: scf.parallel
+// CHECK: scf.parallel
+
+// -----
+
+// Loop without annotations. Cannot be mapped.
+
+func @parallel_no_annotations(%arg0 : index, %arg1 : index, %arg2 : index,
+ %arg3 : index,
+ %buf : memref<?x?xf32>,
+ %res : memref<?x?xf32>) {
+ %four = constant 4 : index
+ scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+ step (%four, %four) {
+ }
+ return
+}
+
+// CHECK-LABEL: @parallel_no_annotations
+// CHECK: scf.parallel