From 7ad2a0e0c2bc323649507e72b112ad2060be100c Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 23 Jul 2014 13:59:07 +0000 Subject: [PATCH] ARM: add patterns for [su]xta[bh] from just a shift. Although the final shifter operand is a rotate, this actually only matters for the half-word extends when the amount == 24. Otherwise folding a shift in is just as good. llvm-svn: 213753 --- llvm/lib/Target/ARM/ARMInstrInfo.td | 2 ++ llvm/lib/Target/ARM/ARMInstrThumb2.td | 18 ++++++++++++++++++ llvm/test/CodeGen/Thumb2/thumb2-sxt_rot.ll | 20 +++++++++++++++----- llvm/test/CodeGen/Thumb2/thumb2-uxt_rot.ll | 24 +++++++++++++++++++++++- 4 files changed, 58 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 70c779d..b2d6a68 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -633,6 +633,8 @@ def imm32 : Operand, ImmLeaf { let ParserMatchClass = Imm32AsmOperand; } +def imm8_or_16 : ImmLeaf; + /// imm1_7 predicate - Immediate in the range [1,7]. def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; } def imm1_7 : Operand, ImmLeaf 0 && Imm < 8; }]> { diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 85e9351..8e2324c 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -1973,6 +1973,16 @@ def t2SXTAH : T2I_exta_rrot<0b000, "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">; +// A simple right-shift can also be used in most cases (the exception is the +// SXTH operations with a rotate of 24: there the non-contiguous bits are +// relevant). +def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, rot_imm:$rot), i8)), + (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), i16)), + (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>, + Requires<[HasT2ExtractPack, IsThumb2]>; + // Zero extenders let AddedComplexity = 16 in { @@ -1999,8 +2009,16 @@ def t2UXTAB : T2I_exta_rrot<0b101, "uxtab", def t2UXTAH : T2I_exta_rrot<0b001, "uxtah", BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">; + +def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)), + (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), + (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>, + Requires<[HasT2ExtractPack, IsThumb2]>; } + //===----------------------------------------------------------------------===// // Arithmetic Instructions. // diff --git a/llvm/test/CodeGen/Thumb2/thumb2-sxt_rot.ll b/llvm/test/CodeGen/Thumb2/thumb2-sxt_rot.ll index 5e0977e..03acee2 100644 --- a/llvm/test/CodeGen/Thumb2/thumb2-sxt_rot.ll +++ b/llvm/test/CodeGen/Thumb2/thumb2-sxt_rot.ll @@ -2,14 +2,14 @@ ; RUN: | FileCheck %s define i32 @test0(i8 %A) { -; CHECK: test0 +; CHECK-LABEL: test0: ; CHECK: sxtb r0, r0 %B = sext i8 %A to i32 ret i32 %B } define signext i8 @test1(i32 %A) { -; CHECK: test1 +; CHECK-LABEL: test1: ; CHECK: lsrs r0, r0, #8 ; CHECK: sxtb r0, r0 %B = lshr i32 %A, 8 @@ -20,9 +20,8 @@ define signext i8 @test1(i32 %A) { } define signext i32 @test2(i32 %A, i32 %X) { -; CHECK: test2 -; CHECK: lsrs r0, r0, #8 -; CHECK: sxtab r0, r1, r0 +; CHECK-LABEL: test2: +; CHECK: sxtab r0, r1, r0, ror #8 %B = lshr i32 %A, 8 %C = shl i32 %A, 24 %D = or i32 %B, %C @@ -31,3 +30,14 @@ define signext i32 @test2(i32 %A, i32 %X) { %G = add i32 %F, %X ret i32 %G } + +define i32 @test3(i32 %A, i32 %X) { +; CHECK-LABEL: test3: +; CHECK: sxtah r0, r0, r1, ror #8 + %X.hi = lshr i32 %X, 8 + %X.trunc = trunc i32 %X.hi to i16 + %addend = sext i16 %X.trunc to i32 + + %sum = add i32 %A, %addend + ret i32 %sum +} diff --git a/llvm/test/CodeGen/Thumb2/thumb2-uxt_rot.ll b/llvm/test/CodeGen/Thumb2/thumb2-uxt_rot.ll index 06e78d5..4afea89 100644 --- a/llvm/test/CodeGen/Thumb2/thumb2-uxt_rot.ll +++ b/llvm/test/CodeGen/Thumb2/thumb2-uxt_rot.ll @@ -24,7 +24,7 @@ define zeroext i32 @test2(i32 %A.u, i32 %B.u) { } define zeroext i32 @test3(i32 %A.u) { -; A8: test3 +; A8-LABEL: test3 ; A8: ubfx r0, r0, #8, #16 %B.u = lshr i32 %A.u, 8 %C.u = shl i32 %A.u, 24 @@ -33,3 +33,25 @@ define zeroext i32 @test3(i32 %A.u) { %F.u = zext i16 %E.u to i32 ret i32 %F.u } + +define i32 @test4(i32 %A, i32 %X) { +; A8-LABEL: test4: +; A8: uxtab r0, r0, r1, ror #16 + %X.hi = lshr i32 %X, 16 + %X.trunc = trunc i32 %X.hi to i8 + %addend = zext i8 %X.trunc to i32 + + %sum = add i32 %A, %addend + ret i32 %sum +} + +define i32 @test5(i32 %A, i32 %X) { +; A8-LABEL: test5: +; A8: uxtah r0, r0, r1, ror #8 + %X.hi = lshr i32 %X, 8 + %X.trunc = trunc i32 %X.hi to i16 + %addend = zext i16 %X.trunc to i32 + + %sum = add i32 %A, %addend + ret i32 %sum +} -- 2.7.4