From 340314c4dcc801d8f493c45cafd79c79c6e8e58e Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 13 Sep 2021 12:08:54 -0700 Subject: [PATCH] Reorder mmt4d shapes: * Revert https://reviews.llvm.org/D107307 so that both LHS and RHS have the same layout with K0 as the innermost dimension. * Continuing from https://reviews.llvm.org/D107003, move also 'K' to the outer side, so that now the inter-tile dimensions as all outer, and the intra-tile dimensions are all inner. Reviewed By: asaadaldien Differential Revision: https://reviews.llvm.org/D109692 --- .../mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml | 14 +++++++------- .../mlir/dialects/linalg/opdsl/ops/core_named_ops.py | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml index ec71aa5..70c4a3c 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml @@ -181,7 +181,7 @@ structured_op: !LinalgStructuredOpConfig name: rhs usage: InputOperand type_var: RhsType - shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4, s1, s3, s5)> + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4, s1, s5, s3)> - !LinalgOperandDefConfig name: accum usage: OutputOperand @@ -189,19 +189,19 @@ structured_op: !LinalgStructuredOpConfig shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s4, s2, s5)> indexing_maps: !LinalgIndexingMapsConfig static_indexing_maps: - - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d4, d2, + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d2, d3, d5)> - - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d1, d4, d5, - d3)> - - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2, - d3)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d1, d2, d4, + d5)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d3, + d4)> iterator_types: - parallel - parallel + - reduction - parallel - parallel - reduction - - reduction assignments: - !ScalarAssign arg: accum diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py index 38db294..fc37a2e 100644 --- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py @@ -39,7 +39,7 @@ def quantized_matmul( @linalg_structured_op def mmt4d(lhs=TensorDef(TV.LhsType, S.M, S.K, S.M0, S.K0), - rhs=TensorDef(TV.RhsType, S.N, S.K, S.K0, S.N0), + rhs=TensorDef(TV.RhsType, S.N, S.K, S.N0, S.K0), accum=TensorDef(TV.AccumType, S.M, S.N, S.M0, S.N0, output=True)): """Performs a matrix-matrix-transpose multiplication of two 4D inputs. @@ -52,9 +52,9 @@ def mmt4d(lhs=TensorDef(TV.LhsType, S.M, S.K, S.M0, S.K0), '0' suffixes below, for instance the LHS matrix shape (M, K, M0, K0) reads as: MxK tiles, each of shape M0xK0. """ - domain(D.m, D.n, D.m0, D.n0, D.k, D.k0) + domain(D.m, D.n, D.k, D.m0, D.n0, D.k0) implements(ContractionOpInterface) - accum[D.m, D.n, D.m0, D.n0] += cast(TV.AccumType, lhs[D.m, D.k, D.m0, D.k0]) * cast(TV.AccumType, rhs[D.n, D.k, D.k0, D.n0]) + accum[D.m, D.n, D.m0, D.n0] += cast(TV.AccumType, lhs[D.m, D.k, D.m0, D.k0]) * cast(TV.AccumType, rhs[D.n, D.k, D.n0, D.k0]) @linalg_structured_op def batch_matmul( -- 2.7.4