From 8dace28f92056a04e02d913684d3f448ee1f295a Mon Sep 17 00:00:00 2001
From: Jakub Lichman <limo@google.com>
Date: Wed, 19 Aug 2020 15:13:19 +0000
Subject: [PATCH] [mlir][VectorToSCF] Bug in TransferRead lowering fixed

If Memref has rank > 1 this pass emits N-1 loops around
TransferRead op and transforms the op itself to 1D read. Since vectors
must have static shape while memrefs don't the pass emits if condition
to prevent out of bounds accesses in case some memref dimension is smaller
than the corresponding dimension of targeted vector. This logic is fine
but authors forgot to apply `permutation_map` on loops upper bounds and
thus if condition compares induction variable to incorrect loop upper bound
(dimension of the memref) in case `permutation_map` is not identity map.
This commit aims to fix that.
---
 mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp    |  6 ++-
 .../Conversion/VectorToSCF/vector-to-loops.mlir    | 57 ++++++++++++++++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)
diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
index 95208ad..0f428f8 100644
--- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
+++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
@@ -16,6 +16,7 @@
 
 #include "../PassDetail.h"
 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
+#include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Dialect/SCF/EDSC/Builders.h"
 #include "mlir/Dialect/SCF/EDSC/Intrinsics.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
@@ -203,7 +204,10 @@ Value NDTransferOpHelper<ConcreteOp>::emitInBoundsCondition(
   Value inBoundsCondition;
   majorIvsPlusOffsets.reserve(majorIvs.size());
   unsigned idx = 0;
-  for (auto it : llvm::zip(majorIvs, majorOffsets, memrefBounds.getUbs())) {
+  SmallVector<Value, 4> bounds =
+      linalg::applyMapToValues(rewriter, xferOp.getLoc(),
+                               xferOp.permutation_map(), memrefBounds.getUbs());
+  for (auto it : llvm::zip(majorIvs, majorOffsets, bounds)) {
     Value iv = std::get<0>(it), off = std::get<1>(it), ub = std::get<2>(it);
     using namespace mlir::edsc::op;
     majorIvsPlusOffsets.push_back(iv + off);
diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
index 213877c..5c2da79 100644
--- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
+++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
@@ -400,3 +400,60 @@ func @transfer_read_simple(%A : memref<2x2xf32>) -> vector<2x2xf32> {
   %0 = vector.transfer_read %A[%c0, %c0], %f0 : memref<2x2xf32>, vector<2x2xf32>
   return %0 : vector<2x2xf32>
 }
+
+func @transfer_read_minor_identity(%A : memref<?x?x?x?xf32>) -> vector<3x3xf32> {
+  %c0 = constant 0 : index
+  %f0 = constant 0.0 : f32
+  %0 = vector.transfer_read %A[%c0, %c0, %c0, %c0], %f0
+    { permutation_map = affine_map<(d0, d1, d2, d3) -> (d2, d3)> }
+      : memref<?x?x?x?xf32>, vector<3x3xf32>
+  return %0 : vector<3x3xf32>
+}
+
+// CHECK-LABEL: transfer_read_minor_identity(
+//  CHECK-SAME:   %[[A:.*]]: memref<?x?x?x?xf32>) -> vector<3x3xf32>
+//       CHECK:   %[[c0:.*]] = constant 0 : index
+//       CHECK:   %[[cst:.*]] = constant 0.000000e+00 : f32
+//       CHECK:   %[[c2:.*]] = constant 2 : index
+//       CHECK:   %[[cst0:.*]] = constant dense<0.000000e+00> : vector<3xf32>
+//       CHECK:   %[[m:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<3xf32>>
+//       CHECK:   %[[d:.*]] = dim %[[A]], %[[c2]] : memref<?x?x?x?xf32>
+//       CHECK:   affine.for %[[arg1:.*]] = 0 to 3 {
+//       CHECK:      %[[cmp:.*]] = cmpi "slt", %[[arg1]], %[[d]] : index
+//       CHECK:      scf.if %[[cmp]] {
+//       CHECK:        %[[tr:.*]] = vector.transfer_read %[[A]][%[[c0]], %[[c0]], %[[arg1]], %[[c0]]], %[[cst]] : memref<?x?x?x?xf32>, vector<3xf32>
+//       CHECK:        store %[[tr]], %[[m]][%[[arg1]]] : memref<3xvector<3xf32>>
+//       CHECK:      } else {
+//       CHECK:        store %[[cst0]], %[[m]][%[[arg1]]] : memref<3xvector<3xf32>>
+//       CHECK:      }
+//       CHECK:    }
+//       CHECK:    %[[cast:.*]] = vector.type_cast %[[m]] : memref<3xvector<3xf32>> to memref<vector<3x3xf32>>
+//       CHECK:    %[[ret:.*]]  = load %[[cast]][] : memref<vector<3x3xf32>>
+//       CHECK:    return %[[ret]] : vector<3x3xf32>
+
+func @transfer_write_minor_identity(%A : vector<3x3xf32>, %B : memref<?x?x?x?xf32>) {
+  %c0 = constant 0 : index
+  %f0 = constant 0.0 : f32
+  vector.transfer_write %A, %B[%c0, %c0, %c0, %c0]
+    { permutation_map = affine_map<(d0, d1, d2, d3) -> (d2, d3)> }
+      : vector<3x3xf32>, memref<?x?x?x?xf32>
+  return
+}
+
+// CHECK-LABEL: transfer_write_minor_identity(
+//  CHECK-SAME:   %[[A:.*]]: vector<3x3xf32>,
+//  CHECK-SAME:   %[[B:.*]]: memref<?x?x?x?xf32>)
+//       CHECK:   %[[c0:.*]] = constant 0 : index
+//       CHECK:   %[[c2:.*]] = constant 2 : index
+//       CHECK:   %[[m:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<3xf32>>
+//       CHECK:   %[[cast:.*]] = vector.type_cast %[[m]] : memref<3xvector<3xf32>> to memref<vector<3x3xf32>>
+//       CHECK:   store %[[A]], %[[cast]][] : memref<vector<3x3xf32>>
+//       CHECK:   %[[d:.*]] = dim %[[B]], %[[c2]] : memref<?x?x?x?xf32>
+//       CHECK:   affine.for %[[arg2:.*]] = 0 to 3 {
+//       CHECK:      %[[cmp:.*]] = cmpi "slt", %[[arg2]], %[[d]] : index
+//       CHECK:      scf.if %[[cmp]] {
+//       CHECK:        %[[tmp:.*]] = load %[[m]][%[[arg2]]] : memref<3xvector<3xf32>>
+//       CHECK:        vector.transfer_write %[[tmp]], %[[B]][%[[c0]], %[[c0]], %[[arg2]], %[[c0]]] : vector<3xf32>, memref<?x?x?x?xf32>
+//       CHECK:      }
+//       CHECK:    }
+//       CHECK:    return
-- 
2.7.4