From: Mats Petersson Date: Thu, 1 Jun 2023 10:39:26 +0000 (+0100) Subject: [FLANG] Change loop versioning to use shift instead of divide X-Git-Tag: upstream/17.0.6~6445 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b812932b3582752eb45240044be8ac1a9df4759f;p=platform%2Fupstream%2Fllvm.git [FLANG] Change loop versioning to use shift instead of divide Despite me being convinced that the use of divide didn't produce any divide instructions, it does in fact add more instructions than using a plain shift operation. This patch simply changes the divide to a shift right, with an assert to check that the "divisor" is a power of two. Reviewed By: kiranchandramohan, tblah Differential Revision: https://reviews.llvm.org/D151880 --- diff --git a/flang/lib/Optimizer/Transforms/LoopVersioning.cpp b/flang/lib/Optimizer/Transforms/LoopVersioning.cpp index f1588d2..92430d4 100644 --- a/flang/lib/Optimizer/Transforms/LoopVersioning.cpp +++ b/flang/lib/Optimizer/Transforms/LoopVersioning.cpp @@ -262,15 +262,19 @@ void LoopVersioningPass::runOnOperation() { loc, curIndex, totalIndex) : curIndex; } - mlir::Value elemSize = - builder.createIntegerConstant(loc, idxTy, arg.size); // This is the lowest dimension - which doesn't need scaling mlir::Value finalIndex = builder.createConvert(loc, idxTy, coop->getOperand(1)); if (totalIndex) { + assert(llvm::isPowerOf2_32(arg.size) && + "Expected power of two here"); + unsigned bits = llvm::Log2_32(arg.size); + mlir::Value elemShift = + builder.createIntegerConstant(loc, idxTy, bits); totalIndex = builder.create( loc, - builder.create(loc, totalIndex, elemSize), + builder.create(loc, totalIndex, + elemShift), finalIndex); } else { totalIndex = finalIndex; diff --git a/flang/test/Transforms/loop-versioning.fir b/flang/test/Transforms/loop-versioning.fir index 3c8930c..6fc8eb8 100644 --- a/flang/test/Transforms/loop-versioning.fir +++ b/flang/test/Transforms/loop-versioning.fir @@ -366,9 +366,9 @@ func.func @sum1dfixed(%arg0: !fir.ref> {fir.bindc_name = "a"}, // Check the 2D -> 1D coordinate conversion, should have a multiply and a final add. // Some other operations are checked to synch the different parts. // CHECK: %[[OUTER_IDX:.*]] = arith.muli %[[DIMS1]]#2, {{.*}} -// CHECK: %[[ITEMSIZE:.*]] = arith.constant 8 : index // CHECK: %[[INNER_IDX:.*]] = fir.convert {{.*}} -// CHECK: %[[OUTER_DIV:.*]] = arith.divsi %[[OUTER_IDX]], %[[ITEMSIZE]] +// CHECK: %[[ITEMSHIFT:.*]] = arith.constant 3 : index +// CHECK: %[[OUTER_DIV:.*]] = arith.shrsi %[[OUTER_IDX]], %[[ITEMSHIFT]] // CHECK: %[[C2D:.*]] = arith.addi %[[OUTER_DIV]], %[[INNER_IDX]] // CHECK: %[[COORD:.*]] = fir.coordinate_of %[[BOXADDR]], %[[C2D]] : (!fir.ref>, index) -> !fir.ref // CHECK: %{{.*}} = fir.load %[[COORD]] : !fir.ref @@ -498,9 +498,9 @@ func.func @sum1dfixed(%arg0: !fir.ref> {fir.bindc_name = "a"}, // CHECK: %[[OUTER_IDX:.*]] = arith.muli %[[DIMS2]]#2, {{.*}} // CHECK: %[[MIDDLE_IDX:.*]] = arith.muli %[[DIMS1]]#2, {{.*}} // CHECK: %[[MIDDLE_SUM:.*]] = arith.addi %[[MIDDLE_IDX]], %[[OUTER_IDX]] -// CHECK: %[[ITEMSIZE:.*]] = arith.constant 8 : index // CHECK: %[[INNER_IDX:.*]] = fir.convert {{.*}} -// CHECK: %[[MIDDLE_DIV:.*]] = arith.divsi %[[MIDDLE_SUM]], %[[ITEMSIZE]] +// CHECK: %[[ITEMSHIFT:.*]] = arith.constant 3 : index +// CHECK: %[[MIDDLE_DIV:.*]] = arith.shrsi %[[MIDDLE_SUM]], %[[ITEMSHIFT]] // CHECK: %[[C3D:.*]] = arith.addi %[[MIDDLE_DIV]], %[[INNER_IDX]] // CHECK: %[[COORD:.*]] = fir.coordinate_of %[[BOXADDR]], %[[C3D]] : (!fir.ref>, index) -> !fir.ref // CHECK: %{{.*}} = fir.load %[[COORD]] : !fir.ref