From 5cebffc276e6fc34f754151bd0511bd59ca6f562 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Fri, 30 Jun 2023 16:04:28 +0100 Subject: [PATCH] [mlir][Vector] Update the lowering of `vector.transfer_write` to SCF This change updates the lowering of `vector.transfer_write` to SCF when scalable vectors are used. Specifically, when lowering `vector.transfer_write` to a loop of `vector.extractelement` ops, make sure that the upper bound of the generated loop is scaled by `vector.vscale`: ``` %10 = vector.vscale %11 = arith.muli %10, %c16 : index scf.for %arg2 = %c0 to %11 step %c1 ``` For reference, this is the current version (i.e. before this change): ``` scf.for %arg2 = %c0 to %c16 step %c1 ``` Note that this only valid for fixed-width vectors. Differential Revision: https://reviews.llvm.org/D154226 --- mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp | 7 ++++- .../test/Conversion/VectorToSCF/vector-to-scf.mlir | 36 ++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 9456b89..6936613 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -1247,8 +1247,13 @@ struct TransferOp1dConversion : public VectorToSCFPattern { Location loc = xferOp.getLoc(); auto vecType = xferOp.getVectorType(); auto lb = rewriter.create(loc, 0); - auto ub = + Value ub = rewriter.create(loc, vecType.getDimSize(0)); + if (vecType.isScalable()) { + Value vscale = + rewriter.create(loc, rewriter.getIndexType()); + ub = rewriter.create(loc, ub, vscale); + } auto step = rewriter.create(loc, 1); auto loopState = Strategy1d::initialLoopState(rewriter, xferOp); diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir index dd7e525..475b8ab 100644 --- a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir @@ -511,3 +511,39 @@ func.func @transfer_read_with_tensor(%arg: tensor) -> vector<1xf32> { tensor, vector<1xf32> return %0: vector<1xf32> } + +// ----- + +// CHECK-LABEL: transfer_write_scalable +func.func @transfer_write_scalable(%arg0: memref>, %arg1: f32) { + %0 = llvm.mlir.constant(0 : i32) : i32 + %c0 = arith.constant 0 : index + %dim = memref.dim %arg0, %c0 : memref> + %1 = llvm.intr.experimental.stepvector : vector<[16]xi32> + %2 = arith.index_cast %dim : index to i32 + %3 = llvm.mlir.undef : vector<[16]xi32> + %4 = llvm.insertelement %2, %3[%0 : i32] : vector<[16]xi32> + %5 = llvm.shufflevector %4, %3 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<[16]xi32> + %6 = arith.cmpi slt, %1, %5 : vector<[16]xi32> + %7 = llvm.mlir.undef : vector<[16]xf32> + %8 = llvm.insertelement %arg1, %7[%0 : i32] : vector<[16]xf32> + %9 = llvm.shufflevector %8, %7 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<[16]xf32> + vector.transfer_write %9, %arg0[%c0], %6 {in_bounds = [true]} : vector<[16]xf32>, memref> + return +} + +// CHECK-SAME: %[[ARG_0:.*]]: memref>, +// CHECK: %[[C_0:.*]] = arith.constant 0 : index +// CHECK: %[[C_16:.*]] = arith.constant 16 : index +// CHECK: %[[STEP:.*]] = arith.constant 1 : index +// CHECK: %[[MASK_VEC:.*]] = arith.cmpi slt, %{{.*}}, %{{.*}} : vector<[16]xi32> +// CHECK: %[[VSCALE:.*]] = vector.vscale +// CHECK: %[[UB:.*]] = arith.muli %[[VSCALE]], %[[C_16]] : index +// CHECK: scf.for %[[IDX:.*]] = %[[C_0]] to %[[UB]] step %[[STEP]] { +// CHECK: %[[MASK_VAL:.*]] = vector.extractelement %[[MASK_VEC]][%[[IDX]] : index] : vector<[16]xi1> +// CHECK: scf.if %[[MASK_VAL]] { +// CHECK: %[[VAL_TO_STORE:.*]] = vector.extractelement %{{.*}}[%[[IDX]] : index] : vector<[16]xf32> +// CHECK: memref.store %[[VAL_TO_STORE]], %[[ARG_0]][%[[IDX]]] : memref> +// CHECK: } else { +// CHECK: } +// CHECK: } -- 2.7.4