From 4a549be9c3677c75c3ad2c8881a46fb59e245823 Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Sat, 1 Oct 2022 15:36:46 +0800
Subject: [PATCH] [AArch64] Lower multiplication by a negative constant to
 shl+sub+shl

Change the costmodel to lower a = b * C where C = -(2^n - 2^m) to
            lsl     w8, w0, m
            sub     w0, w8, w0, lsl n
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D134934
---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp |  8 ++++++--
 llvm/test/CodeGen/AArch64/mul_pow2.ll           | 12 ++++++------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0d152c6..53df680 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14833,8 +14833,7 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
   // More aggressively, some multiplications N0 * C can be lowered to
   // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
   // e.g. 6=3*2=(2+1)*2.
-  // TODO: consider lowering more cases, e.g. C = -6, -14 or even 45
-  // which equals to (1+2)*16-(1+2).
+  // TODO: lower more cases, e.g. C = 45 which equals to (1+2)*16-(1+2).
 
   // TrailingZeroes is used to test if the mul can be lowered to
   // shift+add+shift.
@@ -14891,6 +14890,8 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
   } else {
     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
     // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+    // (mul x, -(2^(N-M) - 1) * 2^M) => (sub (shl x, M), (shl x, N))
+    APInt SCVPlus1 = -ShiftedConstValue + 1;
     APInt CVNegPlus1 = -ConstValue + 1;
     APInt CVNegMinus1 = -ConstValue - 1;
     if (CVNegPlus1.isPowerOf2()) {
@@ -14899,6 +14900,9 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
     } else if (CVNegMinus1.isPowerOf2()) {
       ShiftAmt = CVNegMinus1.logBase2();
       return Negate(Add(Shl(N0, ShiftAmt), N0));
+    } else if (SCVPlus1.isPowerOf2()) {
+      ShiftAmt = SCVPlus1.logBase2() + TrailingZeroes;
+      return Sub(Shl(N0, TrailingZeroes), Shl(N0, ShiftAmt));
     }
   }
 
diff --git a/llvm/test/CodeGen/AArch64/mul_pow2.ll b/llvm/test/CodeGen/AArch64/mul_pow2.ll
index fa756f8..30c639a 100644
--- a/llvm/test/CodeGen/AArch64/mul_pow2.ll
+++ b/llvm/test/CodeGen/AArch64/mul_pow2.ll
@@ -524,8 +524,8 @@ define i32 @ntest5(i32 %x) {
 define i32 @ntest6(i32 %x) {
 ; CHECK-LABEL: ntest6:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-6
-; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    lsl w8, w0, #1
+; CHECK-NEXT:    sub w0, w8, w0, lsl #3
 ; CHECK-NEXT:    ret
 ;
 ; GISEL-LABEL: ntest6:
@@ -623,8 +623,8 @@ define i32 @ntest11(i32 %x) {
 define i32 @ntest12(i32 %x) {
 ; CHECK-LABEL: ntest12:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-12
-; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    lsl w8, w0, #2
+; CHECK-NEXT:    sub w0, w8, w0, lsl #4
 ; CHECK-NEXT:    ret
 ;
 ; GISEL-LABEL: ntest12:
@@ -656,8 +656,8 @@ define i32 @ntest13(i32 %x) {
 define i32 @ntest14(i32 %x) {
 ; CHECK-LABEL: ntest14:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-14
-; CHECK-NEXT:    mul w0, w0, w8
+; CHECK-NEXT:    lsl w8, w0, #1
+; CHECK-NEXT:    sub w0, w8, w0, lsl #4
 ; CHECK-NEXT:    ret
 ;
 ; GISEL-LABEL: ntest14:
-- 
2.7.4