From 6d05f3f56e22ab9420c71062e2a0c8923aa2a2d7 Mon Sep 17 00:00:00 2001 From: Ben Shi <2283975856@qq.com> Date: Mon, 19 Jun 2023 17:14:41 +0800 Subject: [PATCH] [CSKY] Optimize multiplication with immediates Try to break a multiplication with a specific immediate to an/a addition/subtraction of left shifts. Reviewed By: zixuan-wu Differential Revision: https://reviews.llvm.org/D153106 --- llvm/lib/Target/CSKY/CSKYISelLowering.cpp | 25 +++++ llvm/lib/Target/CSKY/CSKYISelLowering.h | 3 + llvm/lib/Target/CSKY/CSKYSubtarget.h | 2 + llvm/test/CodeGen/CSKY/base-i.ll | 62 +++++------ llvm/test/CodeGen/CSKY/mul-imm.ll | 169 +++++++++--------------------- 5 files changed, 102 insertions(+), 159 deletions(-) diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp index 4a2df7c..5d21aab 100644 --- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp +++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp @@ -1376,3 +1376,28 @@ SDValue CSKYTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, return V; } + +bool CSKYTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, + SDValue C) const { + if (!VT.isScalarInteger()) + return false; + + // Omit if data size exceeds. + if (VT.getSizeInBits() > Subtarget.XLen) + return false; + + if (auto *ConstNode = dyn_cast(C.getNode())) { + const APInt &Imm = ConstNode->getAPIntValue(); + // Break MULT to LSLI + ADDU/SUBU. + if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || + (1 - Imm).isPowerOf2()) + return true; + // Only break MULT for sub targets without MULT32, since an extra + // instruction will be generated against the above 3 cases. We leave it + // unchanged on sub targets with MULT32, since not sure it is better. + if (!Subtarget.hasE2() && (-1 - Imm).isPowerOf2()) + return true; + } + + return false; +} diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.h b/llvm/lib/Target/CSKY/CSKYISelLowering.h index d0abc7e..c724882 100644 --- a/llvm/lib/Target/CSKY/CSKYISelLowering.h +++ b/llvm/lib/Target/CSKY/CSKYISelLowering.h @@ -173,6 +173,9 @@ private: CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg) const; + + bool decomposeMulByConstant(LLVMContext &Context, EVT VT, + SDValue C) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/CSKY/CSKYSubtarget.h b/llvm/lib/Target/CSKY/CSKYSubtarget.h index 9e7ad00..b8be347 100644 --- a/llvm/lib/Target/CSKY/CSKYSubtarget.h +++ b/llvm/lib/Target/CSKY/CSKYSubtarget.h @@ -206,6 +206,8 @@ public: bool isCK810V() const { return CSKYProcFamily == CK810V; } bool isCK860() const { return CSKYProcFamily == CK860; } bool isCK860V() const { return CSKYProcFamily == CK860V; } + + const unsigned XLen = 32; }; } // namespace llvm diff --git a/llvm/test/CodeGen/CSKY/base-i.ll b/llvm/test/CodeGen/CSKY/base-i.ll index 132a39f..6e612cb 100644 --- a/llvm/test/CodeGen/CSKY/base-i.ll +++ b/llvm/test/CodeGen/CSKY/base-i.ll @@ -480,8 +480,8 @@ entry: define i32 @mulRI_X(i32 %x) { ; CHECK-LABEL: mulRI_X: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movi32 a1, 4097 -; CHECK-NEXT: mult16 a0, a1 +; CHECK-NEXT: lsli16 a1, a0, 12 +; CHECK-NEXT: addu16 a0, a1 ; CHECK-NEXT: rts16 ; ; GENERIC-LABEL: mulRI_X: @@ -489,16 +489,8 @@ define i32 @mulRI_X(i32 %x) { ; GENERIC-NEXT: .cfi_def_cfa_offset 0 ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 4 -; GENERIC-NEXT: movi16 a1, 0 -; GENERIC-NEXT: lsli16 a2, a1, 24 -; GENERIC-NEXT: lsli16 a1, a1, 16 -; GENERIC-NEXT: or16 a1, a2 -; GENERIC-NEXT: movi16 a2, 16 -; GENERIC-NEXT: lsli16 a2, a2, 8 -; GENERIC-NEXT: or16 a2, a1 -; GENERIC-NEXT: movi16 a1, 1 -; GENERIC-NEXT: or16 a1, a2 -; GENERIC-NEXT: mult16 a0, a1 +; GENERIC-NEXT: lsli16 a1, a0, 12 +; GENERIC-NEXT: addu16 a0, a1, a0 ; GENERIC-NEXT: addi16 sp, sp, 4 ; GENERIC-NEXT: rts16 entry: @@ -528,8 +520,7 @@ entry: define i16 @MUL_SHORT_I(i16 %x) { ; CHECK-LABEL: MUL_SHORT_I: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movi16 a1, 3 -; CHECK-NEXT: mult16 a0, a1 +; CHECK-NEXT: ixh32 a0, a0, a0 ; CHECK-NEXT: rts16 ; ; GENERIC-LABEL: MUL_SHORT_I: @@ -537,8 +528,8 @@ define i16 @MUL_SHORT_I(i16 %x) { ; GENERIC-NEXT: .cfi_def_cfa_offset 0 ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 4 -; GENERIC-NEXT: movi16 a1, 3 -; GENERIC-NEXT: mult16 a0, a1 +; GENERIC-NEXT: lsli16 a1, a0, 1 +; GENERIC-NEXT: addu16 a0, a1, a0 ; GENERIC-NEXT: addi16 sp, sp, 4 ; GENERIC-NEXT: rts16 entry: @@ -568,9 +559,9 @@ entry: define i8 @MUL_CHAR_I(i8 %x) { ; CHECK-LABEL: MUL_CHAR_I: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movih32 a1, 65535 -; CHECK-NEXT: ori32 a1, a1, 65533 -; CHECK-NEXT: mult16 a0, a1 +; CHECK-NEXT: ixh32 a0, a0, a0 +; CHECK-NEXT: movi16 a1, 0 +; CHECK-NEXT: subu16 a0, a1, a0 ; CHECK-NEXT: rts16 ; ; GENERIC-LABEL: MUL_CHAR_I: @@ -578,15 +569,10 @@ define i8 @MUL_CHAR_I(i8 %x) { ; GENERIC-NEXT: .cfi_def_cfa_offset 0 ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 4 -; GENERIC-NEXT: movi16 a1, 255 -; GENERIC-NEXT: lsli16 a2, a1, 24 -; GENERIC-NEXT: lsli16 a3, a1, 16 -; GENERIC-NEXT: or16 a3, a2 -; GENERIC-NEXT: lsli16 a1, a1, 8 -; GENERIC-NEXT: or16 a1, a3 -; GENERIC-NEXT: movi16 a2, 253 -; GENERIC-NEXT: or16 a2, a1 -; GENERIC-NEXT: mult16 a0, a2 +; GENERIC-NEXT: lsli16 a1, a0, 1 +; GENERIC-NEXT: addu16 a0, a1, a0 +; GENERIC-NEXT: movi16 a1, 0 +; GENERIC-NEXT: subu16 a0, a1, a0 ; GENERIC-NEXT: addi16 sp, sp, 4 ; GENERIC-NEXT: rts16 entry: @@ -619,7 +605,7 @@ define i32 @udivRR(i32 %x, i32 %y) { ; GENERIC-NEXT: rts16 ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: -; GENERIC-NEXT: .p2align 2 +; GENERIC-NEXT: .p2align 2, 0x0 ; GENERIC-NEXT: .LCPI25_0: ; GENERIC-NEXT: .long __udivsi3 entry: @@ -651,7 +637,7 @@ define i32 @udivRI(i32 %x) { ; GENERIC-NEXT: rts16 ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: -; GENERIC-NEXT: .p2align 2 +; GENERIC-NEXT: .p2align 2, 0x0 ; GENERIC-NEXT: .LCPI26_0: ; GENERIC-NEXT: .long __udivsi3 entry: @@ -691,7 +677,7 @@ define i32 @udivRI_X(i32 %x) { ; GENERIC-NEXT: rts16 ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: -; GENERIC-NEXT: .p2align 2 +; GENERIC-NEXT: .p2align 2, 0x0 ; GENERIC-NEXT: .LCPI27_0: ; GENERIC-NEXT: .long __udivsi3 entry: @@ -738,7 +724,7 @@ define i16 @UDIV_SHORT(i16 %x, i16 %y) { ; GENERIC-NEXT: rts16 ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: -; GENERIC-NEXT: .p2align 2 +; GENERIC-NEXT: .p2align 2, 0x0 ; GENERIC-NEXT: .LCPI28_0: ; GENERIC-NEXT: .long __udivsi3 entry: @@ -812,7 +798,7 @@ define i8 @UDIV_CHAR(i8 %x, i8 %y) { ; GENERIC-NEXT: rts16 ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: -; GENERIC-NEXT: .p2align 2 +; GENERIC-NEXT: .p2align 2, 0x0 ; GENERIC-NEXT: .LCPI30_0: ; GENERIC-NEXT: .long __udivsi3 entry: @@ -871,7 +857,7 @@ define i32 @sdivRR(i32 %x, i32 %y) { ; GENERIC-NEXT: rts16 ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: -; GENERIC-NEXT: .p2align 2 +; GENERIC-NEXT: .p2align 2, 0x0 ; GENERIC-NEXT: .LCPI32_0: ; GENERIC-NEXT: .long __divsi3 entry: @@ -903,7 +889,7 @@ define i32 @sdivRI(i32 %x) { ; GENERIC-NEXT: rts16 ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: -; GENERIC-NEXT: .p2align 2 +; GENERIC-NEXT: .p2align 2, 0x0 ; GENERIC-NEXT: .LCPI33_0: ; GENERIC-NEXT: .long __divsi3 entry: @@ -943,7 +929,7 @@ define i32 @sdivRI_X(i32 %x) { ; GENERIC-NEXT: rts16 ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: -; GENERIC-NEXT: .p2align 2 +; GENERIC-NEXT: .p2align 2, 0x0 ; GENERIC-NEXT: .LCPI34_0: ; GENERIC-NEXT: .long __divsi3 entry: @@ -978,7 +964,7 @@ define i16 @SDIV_SHORT(i16 %x, i16 %y) { ; GENERIC-NEXT: rts16 ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: -; GENERIC-NEXT: .p2align 2 +; GENERIC-NEXT: .p2align 2, 0x0 ; GENERIC-NEXT: .LCPI35_0: ; GENERIC-NEXT: .long __divsi3 entry: @@ -1050,7 +1036,7 @@ define i8 @SDIV_CHAR(i8 %x, i8 %y) { ; GENERIC-NEXT: rts16 ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: -; GENERIC-NEXT: .p2align 2 +; GENERIC-NEXT: .p2align 2, 0x0 ; GENERIC-NEXT: .LCPI37_0: ; GENERIC-NEXT: .long __divsi3 entry: diff --git a/llvm/test/CodeGen/CSKY/mul-imm.ll b/llvm/test/CodeGen/CSKY/mul-imm.ll index c87e527..f9491e4 100644 --- a/llvm/test/CodeGen/CSKY/mul-imm.ll +++ b/llvm/test/CodeGen/CSKY/mul-imm.ll @@ -8,8 +8,8 @@ define i32 @mul_i32_4097(i32 %x) { ; CHECK-LABEL: mul_i32_4097: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movi32 a1, 4097 -; CHECK-NEXT: mult16 a0, a1 +; CHECK-NEXT: lsli16 a1, a0, 12 +; CHECK-NEXT: addu16 a0, a1 ; CHECK-NEXT: rts16 ; ; GENERIC-LABEL: mul_i32_4097: @@ -17,16 +17,8 @@ define i32 @mul_i32_4097(i32 %x) { ; GENERIC-NEXT: .cfi_def_cfa_offset 0 ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 4 -; GENERIC-NEXT: movi16 a1, 0 -; GENERIC-NEXT: lsli16 a2, a1, 24 -; GENERIC-NEXT: lsli16 a1, a1, 16 -; GENERIC-NEXT: or16 a1, a2 -; GENERIC-NEXT: movi16 a2, 16 -; GENERIC-NEXT: lsli16 a2, a2, 8 -; GENERIC-NEXT: or16 a2, a1 -; GENERIC-NEXT: movi16 a1, 1 -; GENERIC-NEXT: or16 a1, a2 -; GENERIC-NEXT: mult16 a0, a1 +; GENERIC-NEXT: lsli16 a1, a0, 12 +; GENERIC-NEXT: addu16 a0, a1, a0 ; GENERIC-NEXT: addi16 sp, sp, 4 ; GENERIC-NEXT: rts16 entry: @@ -37,8 +29,8 @@ entry: define i32 @mul_i32_4095(i32 %x) { ; CHECK-LABEL: mul_i32_4095: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movi32 a1, 4095 -; CHECK-NEXT: mult16 a0, a1 +; CHECK-NEXT: lsli16 a1, a0, 12 +; CHECK-NEXT: subu16 a0, a1, a0 ; CHECK-NEXT: rts16 ; ; GENERIC-LABEL: mul_i32_4095: @@ -46,16 +38,8 @@ define i32 @mul_i32_4095(i32 %x) { ; GENERIC-NEXT: .cfi_def_cfa_offset 0 ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 4 -; GENERIC-NEXT: movi16 a1, 0 -; GENERIC-NEXT: lsli16 a2, a1, 24 -; GENERIC-NEXT: lsli16 a1, a1, 16 -; GENERIC-NEXT: or16 a1, a2 -; GENERIC-NEXT: movi16 a2, 15 -; GENERIC-NEXT: lsli16 a2, a2, 8 -; GENERIC-NEXT: or16 a2, a1 -; GENERIC-NEXT: movi16 a1, 255 -; GENERIC-NEXT: or16 a1, a2 -; GENERIC-NEXT: mult16 a0, a1 +; GENERIC-NEXT: lsli16 a1, a0, 12 +; GENERIC-NEXT: subu16 a0, a1, a0 ; GENERIC-NEXT: addi16 sp, sp, 4 ; GENERIC-NEXT: rts16 entry: @@ -66,9 +50,8 @@ entry: define i32 @mul_i32_minus_4095(i32 %x) { ; CHECK-LABEL: mul_i32_minus_4095: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movih32 a1, 65535 -; CHECK-NEXT: ori32 a1, a1, 61441 -; CHECK-NEXT: mult16 a0, a1 +; CHECK-NEXT: lsli16 a1, a0, 12 +; CHECK-NEXT: subu16 a0, a1 ; CHECK-NEXT: rts16 ; ; GENERIC-LABEL: mul_i32_minus_4095: @@ -76,16 +59,8 @@ define i32 @mul_i32_minus_4095(i32 %x) { ; GENERIC-NEXT: .cfi_def_cfa_offset 0 ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 4 -; GENERIC-NEXT: movi16 a1, 255 -; GENERIC-NEXT: lsli16 a2, a1, 24 -; GENERIC-NEXT: lsli16 a1, a1, 16 -; GENERIC-NEXT: or16 a1, a2 -; GENERIC-NEXT: movi16 a2, 240 -; GENERIC-NEXT: lsli16 a2, a2, 8 -; GENERIC-NEXT: or16 a2, a1 -; GENERIC-NEXT: movi16 a1, 1 -; GENERIC-NEXT: or16 a1, a2 -; GENERIC-NEXT: mult16 a0, a1 +; GENERIC-NEXT: lsli16 a1, a0, 12 +; GENERIC-NEXT: subu16 a0, a0, a1 ; GENERIC-NEXT: addi16 sp, sp, 4 ; GENERIC-NEXT: rts16 entry: @@ -96,8 +71,8 @@ entry: define i16 @mul_i16_4097(i16 %x) { ; CHECK-LABEL: mul_i16_4097: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movi32 a1, 4097 -; CHECK-NEXT: mult16 a0, a1 +; CHECK-NEXT: lsli16 a1, a0, 12 +; CHECK-NEXT: addu16 a0, a1 ; CHECK-NEXT: rts16 ; ; GENERIC-LABEL: mul_i16_4097: @@ -105,16 +80,8 @@ define i16 @mul_i16_4097(i16 %x) { ; GENERIC-NEXT: .cfi_def_cfa_offset 0 ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 4 -; GENERIC-NEXT: movi16 a1, 0 -; GENERIC-NEXT: lsli16 a2, a1, 24 -; GENERIC-NEXT: lsli16 a1, a1, 16 -; GENERIC-NEXT: or16 a1, a2 -; GENERIC-NEXT: movi16 a2, 16 -; GENERIC-NEXT: lsli16 a2, a2, 8 -; GENERIC-NEXT: or16 a2, a1 -; GENERIC-NEXT: movi16 a1, 1 -; GENERIC-NEXT: or16 a1, a2 -; GENERIC-NEXT: mult16 a0, a1 +; GENERIC-NEXT: lsli16 a1, a0, 12 +; GENERIC-NEXT: addu16 a0, a1, a0 ; GENERIC-NEXT: addi16 sp, sp, 4 ; GENERIC-NEXT: rts16 entry: @@ -125,8 +92,8 @@ entry: define i16 @mul_i16_4095(i16 %x) { ; CHECK-LABEL: mul_i16_4095: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movi32 a1, 4095 -; CHECK-NEXT: mult16 a0, a1 +; CHECK-NEXT: lsli16 a1, a0, 12 +; CHECK-NEXT: subu16 a0, a1, a0 ; CHECK-NEXT: rts16 ; ; GENERIC-LABEL: mul_i16_4095: @@ -134,16 +101,8 @@ define i16 @mul_i16_4095(i16 %x) { ; GENERIC-NEXT: .cfi_def_cfa_offset 0 ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 4 -; GENERIC-NEXT: movi16 a1, 0 -; GENERIC-NEXT: lsli16 a2, a1, 24 -; GENERIC-NEXT: lsli16 a1, a1, 16 -; GENERIC-NEXT: or16 a1, a2 -; GENERIC-NEXT: movi16 a2, 15 -; GENERIC-NEXT: lsli16 a2, a2, 8 -; GENERIC-NEXT: or16 a2, a1 -; GENERIC-NEXT: movi16 a1, 255 -; GENERIC-NEXT: or16 a1, a2 -; GENERIC-NEXT: mult16 a0, a1 +; GENERIC-NEXT: lsli16 a1, a0, 12 +; GENERIC-NEXT: subu16 a0, a1, a0 ; GENERIC-NEXT: addi16 sp, sp, 4 ; GENERIC-NEXT: rts16 entry: @@ -154,9 +113,8 @@ entry: define i16 @mul_i16_minus_4095(i16 %x) { ; CHECK-LABEL: mul_i16_minus_4095: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movih32 a1, 65535 -; CHECK-NEXT: ori32 a1, a1, 61441 -; CHECK-NEXT: mult16 a0, a1 +; CHECK-NEXT: lsli16 a1, a0, 12 +; CHECK-NEXT: subu16 a0, a1 ; CHECK-NEXT: rts16 ; ; GENERIC-LABEL: mul_i16_minus_4095: @@ -164,16 +122,8 @@ define i16 @mul_i16_minus_4095(i16 %x) { ; GENERIC-NEXT: .cfi_def_cfa_offset 0 ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 4 -; GENERIC-NEXT: movi16 a1, 255 -; GENERIC-NEXT: lsli16 a2, a1, 24 -; GENERIC-NEXT: lsli16 a1, a1, 16 -; GENERIC-NEXT: or16 a1, a2 -; GENERIC-NEXT: movi16 a2, 240 -; GENERIC-NEXT: lsli16 a2, a2, 8 -; GENERIC-NEXT: or16 a2, a1 -; GENERIC-NEXT: movi16 a1, 1 -; GENERIC-NEXT: or16 a1, a2 -; GENERIC-NEXT: mult16 a0, a1 +; GENERIC-NEXT: lsli16 a1, a0, 12 +; GENERIC-NEXT: subu16 a0, a0, a1 ; GENERIC-NEXT: addi16 sp, sp, 4 ; GENERIC-NEXT: rts16 entry: @@ -184,8 +134,8 @@ entry: define i8 @mul_i8_65(i8 %x) { ; CHECK-LABEL: mul_i8_65: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movi16 a1, 65 -; CHECK-NEXT: mult16 a0, a1 +; CHECK-NEXT: lsli16 a1, a0, 6 +; CHECK-NEXT: addu16 a0, a1 ; CHECK-NEXT: rts16 ; ; GENERIC-LABEL: mul_i8_65: @@ -193,8 +143,8 @@ define i8 @mul_i8_65(i8 %x) { ; GENERIC-NEXT: .cfi_def_cfa_offset 0 ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 4 -; GENERIC-NEXT: movi16 a1, 65 -; GENERIC-NEXT: mult16 a0, a1 +; GENERIC-NEXT: lsli16 a1, a0, 6 +; GENERIC-NEXT: addu16 a0, a1, a0 ; GENERIC-NEXT: addi16 sp, sp, 4 ; GENERIC-NEXT: rts16 entry: @@ -205,8 +155,8 @@ entry: define i8 @mul_i8_63(i8 %x) { ; CHECK-LABEL: mul_i8_63: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movi16 a1, 63 -; CHECK-NEXT: mult16 a0, a1 +; CHECK-NEXT: lsli16 a1, a0, 6 +; CHECK-NEXT: subu16 a0, a1, a0 ; CHECK-NEXT: rts16 ; ; GENERIC-LABEL: mul_i8_63: @@ -214,8 +164,8 @@ define i8 @mul_i8_63(i8 %x) { ; GENERIC-NEXT: .cfi_def_cfa_offset 0 ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 4 -; GENERIC-NEXT: movi16 a1, 63 -; GENERIC-NEXT: mult16 a0, a1 +; GENERIC-NEXT: lsli16 a1, a0, 6 +; GENERIC-NEXT: subu16 a0, a1, a0 ; GENERIC-NEXT: addi16 sp, sp, 4 ; GENERIC-NEXT: rts16 entry: @@ -226,9 +176,8 @@ entry: define i8 @mul_i8_minus_63(i8 %x) { ; CHECK-LABEL: mul_i8_minus_63: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movih32 a1, 65535 -; CHECK-NEXT: ori32 a1, a1, 65473 -; CHECK-NEXT: mult16 a0, a1 +; CHECK-NEXT: lsli16 a1, a0, 6 +; CHECK-NEXT: subu16 a0, a1 ; CHECK-NEXT: rts16 ; ; GENERIC-LABEL: mul_i8_minus_63: @@ -236,15 +185,8 @@ define i8 @mul_i8_minus_63(i8 %x) { ; GENERIC-NEXT: .cfi_def_cfa_offset 0 ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 4 -; GENERIC-NEXT: movi16 a1, 255 -; GENERIC-NEXT: lsli16 a2, a1, 24 -; GENERIC-NEXT: lsli16 a3, a1, 16 -; GENERIC-NEXT: or16 a3, a2 -; GENERIC-NEXT: lsli16 a1, a1, 8 -; GENERIC-NEXT: or16 a1, a3 -; GENERIC-NEXT: movi16 a2, 193 -; GENERIC-NEXT: or16 a2, a1 -; GENERIC-NEXT: mult16 a0, a2 +; GENERIC-NEXT: lsli16 a1, a0, 6 +; GENERIC-NEXT: subu16 a0, a0, a1 ; GENERIC-NEXT: addi16 sp, sp, 4 ; GENERIC-NEXT: rts16 entry: @@ -265,15 +207,10 @@ define i32 @mul_i32_minus_4097(i32 %x) { ; GENERIC-NEXT: .cfi_def_cfa_offset 0 ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 4 -; GENERIC-NEXT: movi16 a1, 255 -; GENERIC-NEXT: lsli16 a2, a1, 24 -; GENERIC-NEXT: lsli16 a3, a1, 16 -; GENERIC-NEXT: or16 a3, a2 -; GENERIC-NEXT: movi16 a2, 239 -; GENERIC-NEXT: lsli16 a2, a2, 8 -; GENERIC-NEXT: or16 a2, a3 -; GENERIC-NEXT: or16 a2, a1 -; GENERIC-NEXT: mult16 a0, a2 +; GENERIC-NEXT: lsli16 a1, a0, 12 +; GENERIC-NEXT: addu16 a0, a1, a0 +; GENERIC-NEXT: movi16 a1, 0 +; GENERIC-NEXT: subu16 a0, a1, a0 ; GENERIC-NEXT: addi16 sp, sp, 4 ; GENERIC-NEXT: rts16 entry: @@ -294,15 +231,10 @@ define i16 @mul_i16_minus_4097(i16 %x) { ; GENERIC-NEXT: .cfi_def_cfa_offset 0 ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 4 -; GENERIC-NEXT: movi16 a1, 255 -; GENERIC-NEXT: lsli16 a2, a1, 24 -; GENERIC-NEXT: lsli16 a3, a1, 16 -; GENERIC-NEXT: or16 a3, a2 -; GENERIC-NEXT: movi16 a2, 239 -; GENERIC-NEXT: lsli16 a2, a2, 8 -; GENERIC-NEXT: or16 a2, a3 -; GENERIC-NEXT: or16 a2, a1 -; GENERIC-NEXT: mult16 a0, a2 +; GENERIC-NEXT: lsli16 a1, a0, 12 +; GENERIC-NEXT: addu16 a0, a1, a0 +; GENERIC-NEXT: movi16 a1, 0 +; GENERIC-NEXT: subu16 a0, a1, a0 ; GENERIC-NEXT: addi16 sp, sp, 4 ; GENERIC-NEXT: rts16 entry: @@ -323,15 +255,10 @@ define i8 @mul_i8_minus_65(i8 %x) { ; GENERIC-NEXT: .cfi_def_cfa_offset 0 ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 4 -; GENERIC-NEXT: movi16 a1, 255 -; GENERIC-NEXT: lsli16 a2, a1, 24 -; GENERIC-NEXT: lsli16 a3, a1, 16 -; GENERIC-NEXT: or16 a3, a2 -; GENERIC-NEXT: lsli16 a1, a1, 8 -; GENERIC-NEXT: or16 a1, a3 -; GENERIC-NEXT: movi16 a2, 191 -; GENERIC-NEXT: or16 a2, a1 -; GENERIC-NEXT: mult16 a0, a2 +; GENERIC-NEXT: lsli16 a1, a0, 6 +; GENERIC-NEXT: addu16 a0, a1, a0 +; GENERIC-NEXT: movi16 a1, 0 +; GENERIC-NEXT: subu16 a0, a1, a0 ; GENERIC-NEXT: addi16 sp, sp, 4 ; GENERIC-NEXT: rts16 entry: -- 2.7.4