From: Uday Bondhugula Date: Thu, 29 Aug 2019 06:42:17 +0000 (-0700) Subject: fix loop unroll and jam - operand mapping - imperfect nest case X-Git-Tag: llvmorg-11-init~1466^2~879 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bc2a543225abdd3876ae0b31fac89a3b31872d86;p=platform%2Fupstream%2Fllvm.git fix loop unroll and jam - operand mapping - imperfect nest case - fix operand mapping while cloning sub-blocks to jam - was incorrect for imperfect nests where def/use was across sub-blocks - strengthen/generalize the first test case to cover the previously missed scenario - clean up the other cases while on this. Previously, unroll-jamming the following nest ``` affine.for %arg0 = 0 to 2048 { %0 = alloc() : memref<512x10xf32> affine.for %arg1 = 0 to 10 { %1 = affine.load %0[%arg0, %arg1] : memref<512x10xf32> } dealloc %0 : memref<512x10xf32> } ``` would yield ``` %0 = alloc() : memref<512x10xf32> %1 = affine.apply #map0(%arg0) %2 = alloc() : memref<512x10xf32> affine.for %arg1 = 0 to 10 { %4 = affine.load %0[%arg0, %arg1] : memref<512x10xf32> %5 = affine.apply #map0(%arg0) %6 = affine.load %0[%5, %arg1] : memref<512x10xf32> } dealloc %0 : memref<512x10xf32> %3 = affine.apply #map0(%arg0) dealloc %0 : memref<512x10xf32> ``` instead of ``` module { affine.for %arg0 = 0 to 2048 step 2 { %0 = alloc() : memref<512x10xf32> %1 = affine.apply #map0(%arg0) %2 = alloc() : memref<512x10xf32> affine.for %arg1 = 0 to 10 { %4 = affine.load %0[%arg0, %arg1] : memref<512x10xf32> %5 = affine.apply #map0(%arg0) %6 = affine.load %2[%5, %arg1] : memref<512x10xf32> } dealloc %0 : memref<512x10xf32> %3 = affine.apply #map0(%arg0) dealloc %2 : memref<512x10xf32> } ``` Signed-off-by: Uday Bondhugula Closes tensorflow/mlir#98 COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/mlir/pull/98 from bondhugula:ujam ddbc853f69b5608b3e8ff9b5ac1f6a5a0bb315a4 PiperOrigin-RevId: 266073460 --- diff --git a/mlir/lib/Transforms/LoopUnrollAndJam.cpp b/mlir/lib/Transforms/LoopUnrollAndJam.cpp index 3e92ad7..b6b2f3d 100644 --- a/mlir/lib/Transforms/LoopUnrollAndJam.cpp +++ b/mlir/lib/Transforms/LoopUnrollAndJam.cpp @@ -209,14 +209,14 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp, forOp.setStep(step * unrollJamFactor); auto *forOpIV = forOp.getInductionVar(); - for (auto &subBlock : subBlocks) { - // Builder to insert unroll-jammed bodies. Insert right at the end of - // sub-block. - OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second)); - - // Unroll and jam (appends unrollJamFactor-1 additional copies). - for (unsigned i = 1; i < unrollJamFactor; i++) { - BlockAndValueMapping operandMapping; + // Unroll and jam (appends unrollJamFactor-1 additional copies). + for (unsigned i = 1; i < unrollJamFactor; i++) { + // Operand map persists across all sub-blocks. + BlockAndValueMapping operandMapping; + for (auto &subBlock : subBlocks) { + // Builder to insert unroll-jammed bodies. Insert right at the end of + // sub-block. + OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second)); // If the induction variable is used, create a remapping to the value for // this unrolled instance. diff --git a/mlir/test/Transforms/unroll-jam.mlir b/mlir/test/Transforms/unroll-jam.mlir index fb0173e..e7f88c8 100644 --- a/mlir/test/Transforms/unroll-jam.mlir +++ b/mlir/test/Transforms/unroll-jam.mlir @@ -7,48 +7,52 @@ // CHECK-LABEL: func @unroll_jam_imperfect_nest() { func @unroll_jam_imperfect_nest() { - // CHECK: %c100 = constant 100 : index - // CHECK-NEXT: affine.for %arg0 = 0 to 100 step 2 { affine.for %i = 0 to 101 { - // CHECK: "addi32"(%arg0, %arg0) : (index, index) -> i32 - // CHECK-NEXT: %3 = affine.apply [[MAP_PLUS_1]](%arg0) - // CHECK-NEXT: "addi32"(%3, %3) : (index, index) -> i32 %x = "addi32"(%i, %i) : (index, index) -> i32 affine.for %j = 0 to 17 { - // CHECK: %8 = "addi32"(%arg0, %arg0) : (index, index) -> i32 - // CHECK-NEXT: "addi32"(%8, %8) : (i32, i32) -> i32 - // CHECK-NEXT: %10 = affine.apply [[MAP_PLUS_1]](%arg0) - // CHECK-NEXT: %11 = "addi32"(%10, %10) : (index, index) -> i32 - // CHECK-NEXT: "addi32"(%11, %11) : (i32, i32) -> i32 %y = "addi32"(%i, %i) : (index, index) -> i32 %z = "addi32"(%y, %y) : (i32, i32) -> i32 } - // CHECK: "addi32"(%arg0, %arg0) : (index, index) -> i32 - // CHECK-NEXT: %6 = affine.apply [[MAP_PLUS_1]](%arg0) - // CHECK-NEXT: "addi32"(%6, %6) : (index, index) -> i32 - %w = "addi32"(%i, %i) : (index, index) -> i32 - } // CHECK } - // cleanup loop (single iteration) - // CHECK: "addi32"(%c100, %c100) : (index, index) -> i32 - // CHECK-NEXT: affine.for %arg0 = 0 to 17 { - // CHECK-NEXT: %2 = "addi32"(%c100, %c100) : (index, index) -> i32 - // CHECK-NEXT: "addi32"(%2, %2) : (i32, i32) -> i32 - // CHECK-NEXT: } - // CHECK-NEXT: "addi32"(%c100, %c100) : (index, index) -> i32 + %w = "foo"(%i, %x) : (index, i32) -> i32 + } return } +// CHECK: affine.for [[IV0:%arg[0-9]+]] = 0 to 100 step 2 { +// CHECK-NEXT: [[RES1:%[0-9]+]] = "addi32"([[IV0]], [[IV0]]) +// CHECK-NEXT: [[INC:%[0-9]+]] = affine.apply [[MAP_PLUS_1]]([[IV0]]) +// CHECK-NEXT: [[RES2:%[0-9]+]] = "addi32"([[INC]], [[INC]]) +// CHECK-NEXT: affine.for %{{.*}} = 0 to 17 { +// CHECK-NEXT: [[RES3:%[0-9]+]] = "addi32"([[IV0]], [[IV0]]) +// CHECK-NEXT: "addi32"([[RES3]], [[RES3]]) : (i32, i32) -> i32 +// CHECK-NEXT: [[INC1:%[0-9]+]] = affine.apply [[MAP_PLUS_1]]([[IV0]]) +// CHECK-NEXT: [[RES4:%[0-9]+]] = "addi32"([[INC1]], [[INC1]]) +// CHECK-NEXT: "addi32"([[RES4]], [[RES4]]) : (i32, i32) -> i32 +// CHECK-NEXT: } +// CHECK: "foo"([[IV0]], [[RES1]]) +// CHECK-NEXT: {{.*}} = affine.apply [[MAP_PLUS_1]]([[IV0]]) +// CHECK-NEXT: "foo"({{.*}}, [[RES2]]) +// CHECK: } +// Cleanup loop (single iteration). +// CHECK: %{{.*}} = "addi32"(%c100, %c100) +// CHECK-NEXT: affine.for [[IV0]] = 0 to 17 { +// CHECK-NEXT: [[RESC:%[0-9]+]] = "addi32"(%c100, %c100) +// CHECK-NEXT: "addi32"([[RESC]], [[RESC]]) : (i32, i32) -> i32 +// CHECK-NEXT: } +// CHECK-NEXT: %{{.*}} = "foo"(%c100, %{{.*}}) +// CHECK-NEXT: return -// CHECK-LABEL: func @loop_nest_unknown_count_1(%arg0: index) { +// CHECK-LABEL: func @loop_nest_unknown_count_1 +// CHECK-SAME: [[N:arg[0-9]+]]: index func @loop_nest_unknown_count_1(%N : index) { - // CHECK-NEXT: affine.for %arg1 = 1 to [[MAP_DIV_OFFSET]]()[%arg0] step 2 { - // CHECK-NEXT: affine.for %arg2 = 1 to 100 { - // CHECK-NEXT: %0 = "foo"() : () -> i32 - // CHECK-NEXT: %1 = "foo"() : () -> i32 + // CHECK-NEXT: affine.for %{{.*}} = 1 to [[MAP_DIV_OFFSET]]()[%[[N]]] step 2 { + // CHECK-NEXT: affine.for %{{.*}} = 1 to 100 { + // CHECK-NEXT: %{{.*}} = "foo"() : () -> i32 + // CHECK-NEXT: %{{.*}} = "foo"() : () -> i32 // CHECK-NEXT: } // CHECK-NEXT: } // A cleanup loop should be generated here. - // CHECK-NEXT: affine.for %arg1 = [[MAP_DIV_OFFSET]]()[%arg0] to %arg0 { - // CHECK-NEXT: affine.for %arg2 = 1 to 100 { + // CHECK-NEXT: affine.for %{{.*}} = [[MAP_DIV_OFFSET]]()[%[[N]]] to %[[N]] { + // CHECK-NEXT: affine.for %{{.*}} = 1 to 100 { // CHECK-NEXT: "foo"() : () -> i32 // CHECK_NEXT: } // CHECK_NEXT: } @@ -60,21 +64,22 @@ func @loop_nest_unknown_count_1(%N : index) { return } -// CHECK-LABEL: func @loop_nest_unknown_count_2(%arg0: index) { -func @loop_nest_unknown_count_2(%arg : index) { - // CHECK-NEXT: affine.for %arg1 = %arg0 to [[M1]]()[%arg0] step 2 { - // CHECK-NEXT: affine.for %arg2 = 1 to 100 { - // CHECK-NEXT: "foo"(%arg1) : (index) -> i32 - // CHECK-NEXT: %2 = affine.apply #map{{[0-9]+}}(%arg1) - // CHECK-NEXT: "foo"(%2) : (index) -> i32 +// CHECK-LABEL: func @loop_nest_unknown_count_2 +// CHECK-SAME: %[[N:arg[0-9]+]]: index +func @loop_nest_unknown_count_2(%N : index) { + // CHECK-NEXT: affine.for [[IV0:%arg[0-9]+]] = %[[N]] to [[M1]]()[%[[N]]] step 2 { + // CHECK-NEXT: affine.for [[IV1:%arg[0-9]+]] = 1 to 100 { + // CHECK-NEXT: "foo"([[IV0]]) : (index) -> i32 + // CHECK-NEXT: [[RES:%[0-9]+]] = affine.apply #map{{[0-9]+}}([[IV0]]) + // CHECK-NEXT: "foo"([[RES]]) // CHECK-NEXT: } // CHECK-NEXT: } // The cleanup loop is a single iteration one and is promoted. - // CHECK-NEXT: %0 = affine.apply [[M1]]()[%arg0] - // CHECK-NEXT: affine.for %arg1 = 1 to 100 { - // CHECK-NEXT: "foo"(%0) : (index) -> i32 + // CHECK-NEXT: [[RES:%[0-9]+]] = affine.apply [[M1]]()[%[[N]]] + // CHECK-NEXT: affine.for [[IV0]] = 1 to 100 { + // CHECK-NEXT: "foo"([[RES]]) // CHECK_NEXT: } - affine.for %i = %arg to ()[s0] -> (s0+9) ()[%arg] { + affine.for %i = %N to ()[s0] -> (s0+9) ()[%N] { affine.for %j = 1 to 100 { %x = "foo"(%i) : (index) -> i32 } @@ -83,6 +88,9 @@ func @loop_nest_unknown_count_2(%arg : index) { } // CHECK-LABEL: func @loop_nest_symbolic_and_min_upper_bound +// CHECK-SAME: [[M:arg[0-9]+]]: index +// CHECK-SAME: [[N:arg[0-9]+]]: index +// CHECK-SAME: [[K:arg[0-9]+]]: index func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) { affine.for %i = 0 to min ()[s0, s1] -> (s0, s1, 1024)()[%M, %N] { affine.for %j = 0 to %K { @@ -91,16 +99,16 @@ func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) } return } -// CHECK-NEXT: affine.for %arg3 = 0 to min [[MAP_MULTI_RES]]()[%arg0, %arg1] step 2 { -// CHECK-NEXT: affine.for %arg4 = 0 to %arg2 { -// CHECK-NEXT: "foo"(%arg3, %arg4) : (index, index) -> () -// CHECK-NEXT: %0 = affine.apply #map0(%arg3) -// CHECK-NEXT: "foo"(%0, %arg4) : (index, index) -> () +// CHECK-NEXT: affine.for [[IV0:%arg[0-9]+]] = 0 to min [[MAP_MULTI_RES]]()[%[[M]], %[[N]]] step 2 { +// CHECK-NEXT: affine.for [[IV1:%arg[0-9]+]] = 0 to %[[K]] { +// CHECK-NEXT: "foo"([[IV0]], [[IV1]]) +// CHECK-NEXT: [[RES:%[0-9]+]] = affine.apply #map0([[IV0]]) +// CHECK-NEXT: "foo"([[RES]], [[IV1]]) // CHECK-NEXT: } // CHECK-NEXT: } -// CHECK-NEXT: affine.for %arg3 = max [[MAP_MULTI_RES]]()[%arg0, %arg1] to min #map9()[%arg0, %arg1] { -// CHECK-NEXT: affine.for %arg4 = 0 to %arg2 { -// CHECK-NEXT: "foo"(%arg3, %arg4) : (index, index) -> () +// CHECK-NEXT: affine.for [[IV0]] = max [[MAP_MULTI_RES]]()[%[[M]], %[[N]]] to min #map9()[%[[M]], %[[N]]] { +// CHECK-NEXT: affine.for [[IV1]] = 0 to %[[K]] { +// CHECK-NEXT: "foo"([[IV0]], [[IV1]]) // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: return