From: Mats Petersson <mats.petersson@arm.com>
Date: Thu, 1 Jun 2023 10:39:26 +0000 (+0100)
Subject: [FLANG] Change loop versioning to use shift instead of divide
X-Git-Tag: upstream/17.0.6~6445
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b812932b3582752eb45240044be8ac1a9df4759f;p=platform%2Fupstream%2Fllvm.git

[FLANG] Change loop versioning to use shift instead of divide

Despite me being convinced that the use of divide didn't produce any
divide instructions, it does in fact add more instructions than using
a plain shift operation.

This patch simply changes the divide to a shift right, with an
assert to check that the "divisor" is a power of two.

Reviewed By: kiranchandramohan, tblah

Differential Revision: https://reviews.llvm.org/D151880
---

diff --git a/flang/lib/Optimizer/Transforms/LoopVersioning.cpp b/flang/lib/Optimizer/Transforms/LoopVersioning.cpp
index f1588d2..92430d4 100644
--- a/flang/lib/Optimizer/Transforms/LoopVersioning.cpp
+++ b/flang/lib/Optimizer/Transforms/LoopVersioning.cpp
@@ -262,15 +262,19 @@ void LoopVersioningPass::runOnOperation() {
                                             loc, curIndex, totalIndex)
                                       : curIndex;
           }
-          mlir::Value elemSize =
-              builder.createIntegerConstant(loc, idxTy, arg.size);
           // This is the lowest dimension - which doesn't need scaling
           mlir::Value finalIndex =
               builder.createConvert(loc, idxTy, coop->getOperand(1));
           if (totalIndex) {
+            assert(llvm::isPowerOf2_32(arg.size) &&
+                   "Expected power of two here");
+            unsigned bits = llvm::Log2_32(arg.size);
+            mlir::Value elemShift =
+                builder.createIntegerConstant(loc, idxTy, bits);
             totalIndex = builder.create<mlir::arith::AddIOp>(
                 loc,
-                builder.create<mlir::arith::DivSIOp>(loc, totalIndex, elemSize),
+                builder.create<mlir::arith::ShRSIOp>(loc, totalIndex,
+                                                     elemShift),
                 finalIndex);
           } else {
             totalIndex = finalIndex;
diff --git a/flang/test/Transforms/loop-versioning.fir b/flang/test/Transforms/loop-versioning.fir
index 3c8930c..6fc8eb8 100644
--- a/flang/test/Transforms/loop-versioning.fir
+++ b/flang/test/Transforms/loop-versioning.fir
@@ -366,9 +366,9 @@ func.func @sum1dfixed(%arg0: !fir.ref<!fir.array<?xf64>> {fir.bindc_name = "a"},
 // Check the 2D -> 1D coordinate conversion, should have a multiply and a final add.
 // Some other operations are checked to synch the different parts.
 // CHECK: %[[OUTER_IDX:.*]] = arith.muli %[[DIMS1]]#2, {{.*}}
-// CHECK: %[[ITEMSIZE:.*]] = arith.constant 8 : index
 // CHECK: %[[INNER_IDX:.*]] = fir.convert {{.*}}
-// CHECK: %[[OUTER_DIV:.*]] = arith.divsi %[[OUTER_IDX]], %[[ITEMSIZE]]
+// CHECK: %[[ITEMSHIFT:.*]] = arith.constant 3 : index
+// CHECK: %[[OUTER_DIV:.*]] = arith.shrsi %[[OUTER_IDX]], %[[ITEMSHIFT]]
 // CHECK: %[[C2D:.*]] = arith.addi %[[OUTER_DIV]], %[[INNER_IDX]]
 // CHECK: %[[COORD:.*]] = fir.coordinate_of %[[BOXADDR]], %[[C2D]] : (!fir.ref<!fir.array<?xf64>>, index) -> !fir.ref<f64>
 // CHECK: %{{.*}} = fir.load %[[COORD]] : !fir.ref<f64>
@@ -498,9 +498,9 @@ func.func @sum1dfixed(%arg0: !fir.ref<!fir.array<?xf64>> {fir.bindc_name = "a"},
 // CHECK: %[[OUTER_IDX:.*]] = arith.muli %[[DIMS2]]#2, {{.*}}
 // CHECK: %[[MIDDLE_IDX:.*]] = arith.muli %[[DIMS1]]#2, {{.*}}
 // CHECK: %[[MIDDLE_SUM:.*]] = arith.addi %[[MIDDLE_IDX]], %[[OUTER_IDX]]
-// CHECK: %[[ITEMSIZE:.*]] = arith.constant 8 : index
 // CHECK: %[[INNER_IDX:.*]] = fir.convert {{.*}}
-// CHECK: %[[MIDDLE_DIV:.*]] = arith.divsi %[[MIDDLE_SUM]], %[[ITEMSIZE]]
+// CHECK: %[[ITEMSHIFT:.*]] = arith.constant 3 : index
+// CHECK: %[[MIDDLE_DIV:.*]] = arith.shrsi %[[MIDDLE_SUM]], %[[ITEMSHIFT]]
 // CHECK: %[[C3D:.*]] = arith.addi %[[MIDDLE_DIV]], %[[INNER_IDX]]
 // CHECK: %[[COORD:.*]] = fir.coordinate_of %[[BOXADDR]], %[[C3D]] : (!fir.ref<!fir.array<?xf64>>, index) -> !fir.ref<f64>
 // CHECK: %{{.*}} = fir.load %[[COORD]] : !fir.ref<f64>