#### Return modes
- This 3 returned handles point to:
+ This 4 returned handles point to:
+ - the parent for op,
- the fill op used to initialize the neutral element,
- the parallel tiled op and
- the result-combining op.
let arguments = (ins PDL_Operation:$target,
DefaultValuedAttr<I64ArrayAttr, "{}">:$tile_sizes);
- let results = (outs PDL_Operation:$fill_op,
+ let results = (outs PDL_Operation:$for_op,
+ PDL_Operation:$fill_op,
PDL_Operation:$split_linalg_op,
PDL_Operation:$combining_linalg_op);
#### Return modes
- This 3 returned handles point to:
+ This 4 returned handles point to:
+ - the parent foreach_thread op,
- the fill op used to initialize the neutral element,
- the parallel tiled op and
- the result-combining op.
DefaultValuedAttr<I64ArrayAttr, "{}">:$num_threads,
DefaultValuedAttr<I64ArrayAttr, "{}">:$tile_sizes,
OptionalAttr<DeviceMappingArrayAttr>:$mapping);
- let results = (outs PDL_Operation:$fill_op,
+ let results = (outs PDL_Operation:$foreach_thread_op,
+ PDL_Operation:$fill_op,
PDL_Operation:$split_linalg_op,
PDL_Operation:$combining_linalg_op);
}
//===----------------------------------------------------------------------===//
-// SplitReductionOp
+// TileReductionUsingScfOp
//===----------------------------------------------------------------------===//
DiagnosedSilenceableFailure transform::TileReductionUsingScfOp::applyToOne(
if (failed(result))
return DiagnosedSilenceableFailure(reportUnknownTransformError(target));
+ results.push_back(result->loops.front());
results.push_back(result->initialOp);
results.push_back(result->parallelTiledOp);
results.push_back(result->mergeOp);
diag << "could not tile reduction in target.";
return DiagnosedSilenceableFailure::silenceableFailure(std::move(diag));
}
+ results.push_back(result->loops);
results.push_back(result->initialOp);
results.push_back(result->parallelTiledOp);
results.push_back(result->mergeOp);
transform.sequence failures(propagate) {
^bb0(%arg1: !pdl.operation):
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1
- %1, %2, %3 = transform.structured.tile_reduction_using_scf %0 { tile_sizes = [0, 5] }
+ %loop, %1, %2, %3 = transform.structured.tile_reduction_using_scf %0 { tile_sizes = [0, 5] }
}
// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
transform.sequence failures(propagate) {
^bb0(%arg1: !pdl.operation):
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1
- %1, %2, %3 = transform.structured.tile_reduction_using_scf %0 { tile_sizes = [5, 0] }
+ %loop, %1, %2, %3 = transform.structured.tile_reduction_using_scf %0 { tile_sizes = [5, 0] }
}
// CHECK: func @reduction_tile_transpose
transform.sequence failures(propagate) {
^bb0(%arg1: !pdl.operation):
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1
- %1, %2, %3 = transform.structured.tile_reduction_using_foreach_thread %0 { num_threads = [0, 5] }
+ %loop, %1, %2, %3 = transform.structured.tile_reduction_using_foreach_thread %0 { num_threads = [0, 5] }
}
// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (-(d0 * (s0 ceildiv 5)) + s0, s0 ceildiv 5)>
transform.sequence failures(propagate) {
^bb0(%arg1: !pdl.operation):
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
- %1, %2, %3 = transform.structured.tile_reduction_using_foreach_thread %0 { num_threads = [0, 0, 5] }
+ %loop, %1, %2, %3 = transform.structured.tile_reduction_using_foreach_thread %0 { num_threads = [0, 0, 5] }
}
// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (-(d0 * (s0 ceildiv 5)) + s0, s0 ceildiv 5)>
transform.sequence failures(propagate) {
^bb0(%arg1: !pdl.operation):
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1
- %1, %2, %3 = transform.structured.tile_reduction_using_foreach_thread %0
+ %loop, %1, %2, %3 = transform.structured.tile_reduction_using_foreach_thread %0
{ num_threads = [0, 5], tile_sizes = [0, 3], mapping = [#gpu.thread<x>] }
}
transform.sequence failures(propagate) {
^bb0(%arg1: !pdl.operation):
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1
- %1, %2, %3 = transform.structured.tile_reduction_using_foreach_thread %0
+ %loop, %1, %2, %3 = transform.structured.tile_reduction_using_foreach_thread %0
{ num_threads = [0, 5], tile_sizes = [0, 3], mapping = [#gpu.thread<x>] }
// CHECK: expecting fill