From 4a549be9c3677c75c3ad2c8881a46fb59e245823 Mon Sep 17 00:00:00 2001 From: zhongyunde Date: Sat, 1 Oct 2022 15:36:46 +0800 Subject: [PATCH] [AArch64] Lower multiplication by a negative constant to shl+sub+shl Change the costmodel to lower a = b * C where C = -(2^n - 2^m) to lsl w8, w0, m sub w0, w8, w0, lsl n Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D134934 --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 8 ++++++-- llvm/test/CodeGen/AArch64/mul_pow2.ll | 12 ++++++------ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0d152c6..53df680 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14833,8 +14833,7 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, // More aggressively, some multiplications N0 * C can be lowered to // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M, // e.g. 6=3*2=(2+1)*2. - // TODO: consider lowering more cases, e.g. C = -6, -14 or even 45 - // which equals to (1+2)*16-(1+2). + // TODO: lower more cases, e.g. C = 45 which equals to (1+2)*16-(1+2). // TrailingZeroes is used to test if the mul can be lowered to // shift+add+shift. @@ -14891,6 +14890,8 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, } else { // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) // (mul x, -(2^N + 1)) => - (add (shl x, N), x) + // (mul x, -(2^(N-M) - 1) * 2^M) => (sub (shl x, M), (shl x, N)) + APInt SCVPlus1 = -ShiftedConstValue + 1; APInt CVNegPlus1 = -ConstValue + 1; APInt CVNegMinus1 = -ConstValue - 1; if (CVNegPlus1.isPowerOf2()) { @@ -14899,6 +14900,9 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, } else if (CVNegMinus1.isPowerOf2()) { ShiftAmt = CVNegMinus1.logBase2(); return Negate(Add(Shl(N0, ShiftAmt), N0)); + } else if (SCVPlus1.isPowerOf2()) { + ShiftAmt = SCVPlus1.logBase2() + TrailingZeroes; + return Sub(Shl(N0, TrailingZeroes), Shl(N0, ShiftAmt)); } } diff --git a/llvm/test/CodeGen/AArch64/mul_pow2.ll b/llvm/test/CodeGen/AArch64/mul_pow2.ll index fa756f8..30c639a 100644 --- a/llvm/test/CodeGen/AArch64/mul_pow2.ll +++ b/llvm/test/CodeGen/AArch64/mul_pow2.ll @@ -524,8 +524,8 @@ define i32 @ntest5(i32 %x) { define i32 @ntest6(i32 %x) { ; CHECK-LABEL: ntest6: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-6 -; CHECK-NEXT: mul w0, w0, w8 +; CHECK-NEXT: lsl w8, w0, #1 +; CHECK-NEXT: sub w0, w8, w0, lsl #3 ; CHECK-NEXT: ret ; ; GISEL-LABEL: ntest6: @@ -623,8 +623,8 @@ define i32 @ntest11(i32 %x) { define i32 @ntest12(i32 %x) { ; CHECK-LABEL: ntest12: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-12 -; CHECK-NEXT: mul w0, w0, w8 +; CHECK-NEXT: lsl w8, w0, #2 +; CHECK-NEXT: sub w0, w8, w0, lsl #4 ; CHECK-NEXT: ret ; ; GISEL-LABEL: ntest12: @@ -656,8 +656,8 @@ define i32 @ntest13(i32 %x) { define i32 @ntest14(i32 %x) { ; CHECK-LABEL: ntest14: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-14 -; CHECK-NEXT: mul w0, w0, w8 +; CHECK-NEXT: lsl w8, w0, #1 +; CHECK-NEXT: sub w0, w8, w0, lsl #4 ; CHECK-NEXT: ret ; ; GISEL-LABEL: ntest14: -- 2.7.4