VectorType lhsType = op.getLhsType();
Value lhs = op.lhs(), rhs = op.rhs(), res = op.acc();
- // Set up the parallel/reduction structure in right form.
- AffineExpr m, n, k;
- bindDims(rewriter.getContext(), m, n, k);
-
//
// Two outer parallel, one inner reduction (matmat flavor).
//
UnrolledOuterProductEmitter e(rewriter, op);
if (e.iters({Par(), Par(), Red()})) {
+ // Set up the parallel/reduction structure in right form.
+ AffineExpr m, n, k;
+ bindDims(rewriter.getContext(), m, n, k);
// Classical row-major matmul: Just permute the lhs.
if (e.layout({{m, k}, {k, n}, {m, n}}))
return e.outer_prod(e.t(lhs), rhs, res, lhsType.getDimSize(1));
// One outer parallel, one inner reduction (matvec flavor)
//
if (e.iters({Par(), Red()})) {
+ AffineExpr m, k;
+ bindDims(rewriter.getContext(), m, k);
+
+ // Case mat-vec: transpose.
+ if (e.layout({{m, k}, {k}, {m}}))
+ return e.outer_prod(e.t(lhs), rhs, res, lhsType.getDimSize(1));
+ // Case mat-trans-vec: ready to go.
+ if (e.layout({{k, m}, {k}, {m}}))
+ return e.outer_prod(lhs, rhs, res, lhsType.getDimSize(0));
+ // Case vec-mat: swap and transpose.
+ if (e.layout({{k}, {m, k}, {m}}))
+ return e.outer_prod(e.t(rhs), lhs, res, lhsType.getDimSize(0));
+ // Case vec-mat-trans: swap and ready to go.
+ if (e.layout({{k}, {k, m}, {m}}))
+ return e.outer_prod(rhs, lhs, res, lhsType.getDimSize(0));
+ return failure();
+ }
+
+ //
+ // One outer reduction, one inner parallel (tmatvec flavor)
+ //
+ if (e.iters({Red(), Par()})) {
+ AffineExpr k, m;
+ bindDims(rewriter.getContext(), k, m);
+
// Case mat-vec: transpose.
- if (e.layout({{m, n}, {n}, {m}}))
+ if (e.layout({{m, k}, {k}, {m}}))
return e.outer_prod(e.t(lhs), rhs, res, lhsType.getDimSize(1));
// Case mat-trans-vec: ready to go.
- if (e.layout({{n, m}, {n}, {m}}))
+ if (e.layout({{k, m}, {k}, {m}}))
return e.outer_prod(lhs, rhs, res, lhsType.getDimSize(0));
// Case vec-mat: swap and transpose.
- if (e.layout({{n}, {m, n}, {m}}))
+ if (e.layout({{k}, {m, k}, {m}}))
return e.outer_prod(e.t(rhs), lhs, res, lhsType.getDimSize(0));
// Case vec-mat-trans: swap and ready to go.
- if (e.layout({{n}, {n, m}, {m}}))
+ if (e.layout({{k}, {k, m}, {m}}))
return e.outer_prod(rhs, lhs, res, lhsType.getDimSize(0));
return failure();
}
iterator_types = ["parallel", "reduction"]
}
+#redpar_vecmattrans_accesses = [
+ affine_map<(i, j) -> (i)>,
+ affine_map<(i, j) -> (i, j)>,
+ affine_map<(i, j) -> (j)>
+]
+#redpar_vecmattrans_trait = {
+ indexing_maps = #redpar_vecmattrans_accesses,
+ iterator_types = ["reduction", "parallel"]
+}
+
// CHECK-LABEL: func @matvec2x2
// CHECK-SAME: %[[A:.*0]]: memref<vector<2x2xf32>>
// CHECK-SAME: %[[B:.*1]]: memref<vector<2xf32>>
memref.store %0, %arg2[] : memref<vector<2xf32>>
return
}
+
+// CHECK-LABEL: func @redpar_vecmattrans2x2
+// CHECK-SAME: %[[A:.*0]]: memref<vector<2x2xf32>>
+// CHECK-SAME: %[[B:.*1]]: memref<vector<2xf32>>
+// CHECK-SAME: %[[C:.*2]]: memref<vector<2xf32>>
+// CHECK: %[[T0:.*]] = memref.load %[[A]][] : memref<vector<2x2xf32>>
+// CHECK: %[[T1:.*]] = memref.load %[[B]][] : memref<vector<2xf32>>
+// CHECK: %[[T2:.*]] = memref.load %[[C]][] : memref<vector<2xf32>>
+// CHECK: %[[T3:.*]] = vector.extract %[[T0]][0] : vector<2x2xf32>
+// CHECK: %[[T4:.*]] = vector.extract %[[T1]][0] : vector<2xf32>
+// CHECK: %[[T5:.*]] = vector.outerproduct %[[T3]], %[[T4]], %[[T2]] {kind = #vector.kind<add>} : vector<2xf32>, f32
+// CHECK: %[[T6:.*]] = vector.extract %[[T0]][1] : vector<2x2xf32>
+// CHECK: %[[T7:.*]] = vector.extract %[[T1]][1] : vector<2xf32>
+// CHECK: %[[T8:.*]] = vector.outerproduct %[[T6]], %[[T7]], %[[T5]] {kind = #vector.kind<add>} : vector<2xf32>, f32
+// CHECK: memref.store %[[T8]], %[[C]][] : memref<vector<2xf32>>
+// CHECK: return
+func @redpar_vecmattrans2x2(%arg0: memref<vector<2x2xf32>>, %arg1: memref<vector<2xf32>>,
+ %arg2: memref<vector<2xf32>>) {
+ %A = memref.load %arg0[] : memref<vector<2x2xf32>>
+ %x = memref.load %arg1[] : memref<vector<2xf32>>
+ %b = memref.load %arg2[] : memref<vector<2xf32>>
+ %0 = vector.contract #redpar_vecmattrans_trait %x, %A, %b : vector<2xf32>, vector<2x2xf32> into vector<2xf32>
+ memref.store %0, %arg2[] : memref<vector<2xf32>>
+ return
+}