From 68acdcb435ba0e4413f74d3eab16af22d4a49695 Mon Sep 17 00:00:00 2001 From: John Brawn Date: Thu, 13 Aug 2015 10:48:22 +0000 Subject: [PATCH] [ARM] Reorganise and simplify thumb-1 load/store selection Other than PC-relative loads/store the patterns that match the various load/store addressing modes have the same complexity, so the order that they are matched is the order that they appear in the .td file. Rearrange the instruction definitions in ARMInstrThumb.td, and make use of AddedComplexity for PC-relative loads, so that the instruction matching order is the order that results in the simplest selection logic. This also makes register-offset load/store be selected when it should, as previously it was only selected for too-large immediate offsets. Differential Revision: http://reviews.llvm.org/D11800 llvm-svn: 244882 --- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 99 +----- llvm/lib/Target/ARM/ARMInstrThumb.td | 162 ++++----- llvm/test/CodeGen/ARM/load.ll | 571 ++++++++++++++++++++++++++++++-- 3 files changed, 642 insertions(+), 190 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 541944c..b830442 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -160,11 +160,6 @@ public: // Thumb Addressing Modes: bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); - bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset, - unsigned Scale); - bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset); - bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset); - bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset); bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, SDValue &OffImm); bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, @@ -1086,77 +1081,13 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, } bool -ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base, - SDValue &Offset, unsigned Scale) { - if (Scale == 4) { - SDValue TmpBase, TmpOffImm; - if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm)) - return false; // We want to select tLDRspi / tSTRspi instead. - - if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() == ISD::TargetConstantPool) - return false; // We want to select tLDRpci instead. - } - - if (!CurDAG->isBaseWithConstantOffset(N)) - return false; - - // Thumb does not have [sp, r] address mode. - RegisterSDNode *LHSR = dyn_cast(N.getOperand(0)); - RegisterSDNode *RHSR = dyn_cast(N.getOperand(1)); - if ((LHSR && LHSR->getReg() == ARM::SP) || - (RHSR && RHSR->getReg() == ARM::SP)) - return false; - - // FIXME: Why do we explicitly check for a match here and then return false? - // Presumably to allow something else to match, but shouldn't this be - // documented? - int RHSC; - if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) - return false; - - Base = N.getOperand(0); - Offset = N.getOperand(1); - return true; -} - -bool -ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N, - SDValue &Base, - SDValue &Offset) { - return SelectThumbAddrModeRI(N, Base, Offset, 1); -} - -bool -ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N, - SDValue &Base, - SDValue &Offset) { - return SelectThumbAddrModeRI(N, Base, Offset, 2); -} - -bool -ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N, - SDValue &Base, - SDValue &Offset) { - return SelectThumbAddrModeRI(N, Base, Offset, 4); -} - -bool ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, SDValue &OffImm) { - if (Scale == 4) { - SDValue TmpBase, TmpOffImm; - if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm)) - return false; // We want to select tLDRspi / tSTRspi instead. - - if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() == ISD::TargetConstantPool) - return false; // We want to select tLDRpci instead. - } - if (!CurDAG->isBaseWithConstantOffset(N)) { - if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { + if (N.getOpcode() == ISD::ADD) { + return false; // We want to select register offset instead + } else if (N.getOpcode() == ARMISD::Wrapper && + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); } else { Base = N; @@ -1166,23 +1097,6 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, return true; } - RegisterSDNode *LHSR = dyn_cast(N.getOperand(0)); - RegisterSDNode *RHSR = dyn_cast(N.getOperand(1)); - if ((LHSR && LHSR->getReg() == ARM::SP) || - (RHSR && RHSR->getReg() == ARM::SP)) { - ConstantSDNode *LHS = dyn_cast(N.getOperand(0)); - ConstantSDNode *RHS = dyn_cast(N.getOperand(1)); - unsigned LHSC = LHS ? LHS->getZExtValue() : 0; - unsigned RHSC = RHS ? RHS->getZExtValue() : 0; - - // Thumb does not have [sp, #imm5] address mode for non-zero imm5. - if (LHSC != 0 || RHSC != 0) return false; - - Base = N; - OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); - return true; - } - // If the RHS is + imm5 * scale, fold into addr mode. int RHSC; if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { @@ -1191,9 +1105,8 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, return true; } - Base = N.getOperand(0); - OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); - return true; + // Offset is too large, so use register offset instead. + return false; } bool diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td index 40414da..a9acab0 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -591,6 +591,34 @@ def tTRAP : TI<(outs), (ins), IIC_Br, // Load Store Instructions. // +// PC-relative loads need to be matched first as constant pool accesses need to +// always be PC-relative. We do this using AddedComplexity, as the pattern is +// simpler than the patterns of the other load instructions. +let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 10 in +def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, + "ldr", "\t$Rt, $addr", + [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>, + T1Encoding<{0,1,0,0,1,?}> { + // A6.2 & A8.6.59 + bits<3> Rt; + bits<8> addr; + let Inst{10-8} = Rt; + let Inst{7-0} = addr; +} + +// SP-relative loads should be matched before standard immediate-offset loads as +// it means we avoid having to move SP to another register. +let canFoldAsLoad = 1 in +def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, + "ldr", "\t$Rt, $addr", + [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>, + T1LdStSP<{1,?,?}> { + bits<3> Rt; + bits<8> addr; + let Inst{10-8} = Rt; + let Inst{7-0} = addr; +} + // Loads: reg/reg and reg/imm5 let canFoldAsLoad = 1, isReMaterializable = 1 in multiclass thumb_ld_rr_ri_enc reg_opc, bits<4> imm_opc, @@ -598,16 +626,20 @@ multiclass thumb_ld_rr_ri_enc reg_opc, bits<4> imm_opc, AddrMode am, InstrItinClass itin_r, InstrItinClass itin_i, string asm, PatFrag opnode> { - def r : // reg/reg - T1pILdStEncode; + // Immediate-offset loads should be matched before register-offset loads as + // when the offset is a constant it's simpler to first check if it fits in the + // immediate offset field then fall back to register-offset if it doesn't. def i : // reg/imm5 T1pILdStEncodeImm; + // Register-offset loads are matched last. + def r : // reg/reg + T1pILdStEncode; } // Stores: reg/reg and reg/imm5 multiclass thumb_st_rr_ri_enc reg_opc, bits<4> imm_opc, @@ -615,32 +647,32 @@ multiclass thumb_st_rr_ri_enc reg_opc, bits<4> imm_opc, AddrMode am, InstrItinClass itin_r, InstrItinClass itin_i, string asm, PatFrag opnode> { - def r : // reg/reg - T1pILdStEncode; def i : // reg/imm5 T1pILdStEncodeImm; + def r : // reg/reg + T1pILdStEncode; } // A8.6.57 & A8.6.60 -defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rrs4, +defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rr, t_addrmode_is4, AddrModeT1_4, IIC_iLoad_r, IIC_iLoad_i, "ldr", UnOpFrag<(load node:$Src)>>; // A8.6.64 & A8.6.61 -defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rrs1, +defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rr, t_addrmode_is1, AddrModeT1_1, IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb", UnOpFrag<(zextloadi8 node:$Src)>>; // A8.6.76 & A8.6.73 -defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2, +defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rr, t_addrmode_is2, AddrModeT1_2, IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh", UnOpFrag<(zextloadi16 node:$Src)>>; @@ -659,58 +691,36 @@ def tLDRSH : // A8.6.84 "ldrsh", "\t$Rt, $addr", [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>; -let canFoldAsLoad = 1 in -def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, - "ldr", "\t$Rt, $addr", - [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>, - T1LdStSP<{1,?,?}> { - bits<3> Rt; - bits<8> addr; - let Inst{10-8} = Rt; - let Inst{7-0} = addr; -} -let canFoldAsLoad = 1, isReMaterializable = 1 in -def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, - "ldr", "\t$Rt, $addr", - [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>, - T1Encoding<{0,1,0,0,1,?}> { - // A6.2 & A8.6.59 +def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, + "str", "\t$Rt, $addr", + [(store tGPR:$Rt, t_addrmode_sp:$addr)]>, + T1LdStSP<{0,?,?}> { bits<3> Rt; bits<8> addr; let Inst{10-8} = Rt; - let Inst{7-0} = addr; + let Inst{7-0} = addr; } // A8.6.194 & A8.6.192 -defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4, +defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rr, t_addrmode_is4, AddrModeT1_4, IIC_iStore_r, IIC_iStore_i, "str", BinOpFrag<(store node:$LHS, node:$RHS)>>; // A8.6.197 & A8.6.195 -defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1, +defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rr, t_addrmode_is1, AddrModeT1_1, IIC_iStore_bh_r, IIC_iStore_bh_i, "strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; // A8.6.207 & A8.6.205 -defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2, +defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr, t_addrmode_is2, AddrModeT1_2, IIC_iStore_bh_r, IIC_iStore_bh_i, "strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; -def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, - "str", "\t$Rt, $addr", - [(store tGPR:$Rt, t_addrmode_sp:$addr)]>, - T1LdStSP<{0,?,?}> { - bits<3> Rt; - bits<8> addr; - let Inst{10-8} = Rt; - let Inst{7-0} = addr; -} - //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // @@ -1328,16 +1338,16 @@ def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs), (tSUBrr tGPR:$lhs, tGPR:$rhs)>; // Bswap 16 with load/store -def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rrs2:$addr)), (i32 16)), - (tREV16 (tLDRHr t_addrmode_rrs2:$addr))>; def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)), (tREV16 (tLDRHi t_addrmode_is2:$addr))>; -def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)), - t_addrmode_rrs2:$addr), - (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rrs2:$addr)>; +def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rr:$addr)), (i32 16)), + (tREV16 (tLDRHr t_addrmode_rr:$addr))>; def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)), t_addrmode_is2:$addr), (tSTRHi(tREV16 tGPR:$Rn), t_addrmode_is2:$addr)>; +def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)), + t_addrmode_rr:$addr), + (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rr:$addr)>; // ConstantPool def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>; @@ -1372,10 +1382,10 @@ def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>, Requires<[IsThumb, HasV5T]>; // zextload i1 -> zextload i8 -def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr), - (tLDRBr t_addrmode_rrs1:$addr)>; def : T1Pat<(zextloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; +def : T1Pat<(zextloadi1 t_addrmode_rr:$addr), + (tLDRBr t_addrmode_rr:$addr)>; // extload from the stack -> word load from the stack, as it avoids having to // materialize the base in a separate register. This only works when a word @@ -1389,61 +1399,61 @@ def : T1Pat<(extloadi16 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>, Requires<[IsThumb, IsThumb1Only, IsLE]>; // extload -> zextload -def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>; -def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; -def : T1Pat<(extloadi8 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>; -def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; -def : T1Pat<(extloadi16 t_addrmode_rrs2:$addr), (tLDRHr t_addrmode_rrs2:$addr)>; -def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>; +def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; +def : T1Pat<(extloadi1 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>; +def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; +def : T1Pat<(extloadi8 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>; +def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>; +def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>; // If it's impossible to use [r,r] address mode for sextload, select to // ldr{b|h} + sxt{b|h} instead. def : T1Pat<(sextloadi8 t_addrmode_is1:$addr), (tSXTB (tLDRBi t_addrmode_is1:$addr))>, Requires<[IsThumb, IsThumb1Only, HasV6]>; -def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr), - (tSXTB (tLDRBr t_addrmode_rrs1:$addr))>, +def : T1Pat<(sextloadi8 t_addrmode_rr:$addr), + (tSXTB (tLDRBr t_addrmode_rr:$addr))>, Requires<[IsThumb, IsThumb1Only, HasV6]>; def : T1Pat<(sextloadi16 t_addrmode_is2:$addr), (tSXTH (tLDRHi t_addrmode_is2:$addr))>, Requires<[IsThumb, IsThumb1Only, HasV6]>; -def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr), - (tSXTH (tLDRHr t_addrmode_rrs2:$addr))>, +def : T1Pat<(sextloadi16 t_addrmode_rr:$addr), + (tSXTH (tLDRHr t_addrmode_rr:$addr))>, Requires<[IsThumb, IsThumb1Only, HasV6]>; -def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr), - (tASRri (tLSLri (tLDRBr t_addrmode_rrs1:$addr), 24), 24)>; def : T1Pat<(sextloadi8 t_addrmode_is1:$addr), (tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>; -def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr), - (tASRri (tLSLri (tLDRHr t_addrmode_rrs2:$addr), 16), 16)>; +def : T1Pat<(sextloadi8 t_addrmode_rr:$addr), + (tASRri (tLSLri (tLDRBr t_addrmode_rr:$addr), 24), 24)>; def : T1Pat<(sextloadi16 t_addrmode_is2:$addr), (tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>; +def : T1Pat<(sextloadi16 t_addrmode_rr:$addr), + (tASRri (tLSLri (tLDRHr t_addrmode_rr:$addr), 16), 16)>; def : T1Pat<(atomic_load_8 t_addrmode_is1:$src), (tLDRBi t_addrmode_is1:$src)>; -def : T1Pat<(atomic_load_8 t_addrmode_rrs1:$src), - (tLDRBr t_addrmode_rrs1:$src)>; +def : T1Pat<(atomic_load_8 t_addrmode_rr:$src), + (tLDRBr t_addrmode_rr:$src)>; def : T1Pat<(atomic_load_16 t_addrmode_is2:$src), (tLDRHi t_addrmode_is2:$src)>; -def : T1Pat<(atomic_load_16 t_addrmode_rrs2:$src), - (tLDRHr t_addrmode_rrs2:$src)>; +def : T1Pat<(atomic_load_16 t_addrmode_rr:$src), + (tLDRHr t_addrmode_rr:$src)>; def : T1Pat<(atomic_load_32 t_addrmode_is4:$src), (tLDRi t_addrmode_is4:$src)>; -def : T1Pat<(atomic_load_32 t_addrmode_rrs4:$src), - (tLDRr t_addrmode_rrs4:$src)>; +def : T1Pat<(atomic_load_32 t_addrmode_rr:$src), + (tLDRr t_addrmode_rr:$src)>; def : T1Pat<(atomic_store_8 t_addrmode_is1:$ptr, tGPR:$val), (tSTRBi tGPR:$val, t_addrmode_is1:$ptr)>; -def : T1Pat<(atomic_store_8 t_addrmode_rrs1:$ptr, tGPR:$val), - (tSTRBr tGPR:$val, t_addrmode_rrs1:$ptr)>; +def : T1Pat<(atomic_store_8 t_addrmode_rr:$ptr, tGPR:$val), + (tSTRBr tGPR:$val, t_addrmode_rr:$ptr)>; def : T1Pat<(atomic_store_16 t_addrmode_is2:$ptr, tGPR:$val), (tSTRHi tGPR:$val, t_addrmode_is2:$ptr)>; -def : T1Pat<(atomic_store_16 t_addrmode_rrs2:$ptr, tGPR:$val), - (tSTRHr tGPR:$val, t_addrmode_rrs2:$ptr)>; +def : T1Pat<(atomic_store_16 t_addrmode_rr:$ptr, tGPR:$val), + (tSTRHr tGPR:$val, t_addrmode_rr:$ptr)>; def : T1Pat<(atomic_store_32 t_addrmode_is4:$ptr, tGPR:$val), (tSTRi tGPR:$val, t_addrmode_is4:$ptr)>; -def : T1Pat<(atomic_store_32 t_addrmode_rrs4:$ptr, tGPR:$val), - (tSTRr tGPR:$val, t_addrmode_rrs4:$ptr)>; +def : T1Pat<(atomic_store_32 t_addrmode_rr:$ptr, tGPR:$val), + (tSTRr tGPR:$val, t_addrmode_rr:$ptr)>; // Large immediate handling. diff --git a/llvm/test/CodeGen/ARM/load.ll b/llvm/test/CodeGen/ARM/load.ll index 3b2d637..b8f3003 100644 --- a/llvm/test/CodeGen/ARM/load.ll +++ b/llvm/test/CodeGen/ARM/load.ll @@ -1,35 +1,564 @@ -; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-T1 +; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-T2 -define i32 @f1(i8* %p) { + +; Register offset + +; CHECK-LABEL: ldrsb_rr +; CHECK: ldrsb r0, [r0, r1] +define i32 @ldrsb_rr(i8* %p, i32 %n) { +entry: + %arrayidx = getelementptr inbounds i8, i8* %p, i32 %n + %0 = load i8, i8* %arrayidx, align 1 + %conv = sext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrsh_rr +; CHECK-T1: lsls r1, r1, #1 +; CHECK-T1: ldrsh r0, [r0, r1] +; CHECK-T2: ldrsh.w r0, [r0, r1, lsl #1] +define i32 @ldrsh_rr(i16* %p, i32 %n) { +entry: + %arrayidx = getelementptr inbounds i16, i16* %p, i32 %n + %0 = load i16, i16* %arrayidx, align 2 + %conv = sext i16 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrb_rr +; CHECK: ldrb r0, [r0, r1] +define i32 @ldrb_rr(i8* %p, i32 %n) { +entry: + %arrayidx = getelementptr inbounds i8, i8* %p, i32 %n + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrh_rr +; CHECK-T1: lsls r1, r1, #1 +; CHECK-T1: ldrh r0, [r0, r1] +; CHECK-T2: ldrh.w r0, [r0, r1, lsl #1] +define i32 @ldrh_rr(i16* %p, i32 %n) { +entry: + %arrayidx = getelementptr inbounds i16, i16* %p, i32 %n + %0 = load i16, i16* %arrayidx, align 2 + %conv = zext i16 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldr_rr +; CHECK-T1: lsls r1, r1, #2 +; CHECK-T1: ldr r0, [r0, r1] +; CHECK-T2: ldr.w r0, [r0, r1, lsl #2] +define i32 @ldr_rr(i32* %p, i32 %n) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %p, i32 %n + %0 = load i32, i32* %arrayidx, align 4 + ret i32 %0 +} + +; CHECK-LABEL: strb_rr +; CHECK: strb r2, [r0, r1] +define void @strb_rr(i8* %p, i32 %n, i32 %x) { +entry: + %conv = trunc i32 %x to i8 + %arrayidx = getelementptr inbounds i8, i8* %p, i32 %n + store i8 %conv, i8* %arrayidx, align 1 + ret void +} + +; CHECK-LABEL: strh_rr +; CHECK-T1: lsls r1, r1, #1 +; CHECK-T1: strh r2, [r0, r1] +; CHECK-T2: strh.w r2, [r0, r1, lsl #1] +define void @strh_rr(i16* %p, i32 %n, i32 %x) { +entry: + %conv = trunc i32 %x to i16 + %arrayidx = getelementptr inbounds i16, i16* %p, i32 %n + store i16 %conv, i16* %arrayidx, align 2 + ret void +} + +; CHECK-LABEL: str_rr +; CHECK-T1: lsls r1, r1, #2 +; CHECK-T1: str r2, [r0, r1] +; CHECK-T2: str.w r2, [r0, r1, lsl #2] +define void @str_rr(i32* %p, i32 %n, i32 %x) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %p, i32 %n + store i32 %x, i32* %arrayidx, align 4 + ret void +} + + +; Immediate offset of zero + +; CHECK-LABEL: ldrsb_ri_zero +; CHECK-T1: ldrb r0, [r0] +; CHECK-T1: sxtb r0, r0 +; CHECK-T2: ldrsb.w r0, [r0] +define i32 @ldrsb_ri_zero(i8* %p) { +entry: + %0 = load i8, i8* %p, align 1 + %conv = sext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrsh_ri_zero +; CHECK-T1: ldrh r0, [r0] +; CHECK-T1: sxth r0, r0 +; CHECK-T2: ldrsh.w r0, [r0] +define i32 @ldrsh_ri_zero(i16* %p) { +entry: + %0 = load i16, i16* %p, align 2 + %conv = sext i16 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrb_ri_zero +; CHECK: ldrb r0, [r0] +define i32 @ldrb_ri_zero(i8* %p) { +entry: + %0 = load i8, i8* %p, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrh_ri_zero +; CHECK: ldrh r0, [r0] +define i32 @ldrh_ri_zero(i16* %p) { +entry: + %0 = load i16, i16* %p, align 2 + %conv = zext i16 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldr_ri_zero +; CHECK: ldr r0, [r0] +define i32 @ldr_ri_zero(i32* %p) { +entry: + %0 = load i32, i32* %p, align 4 + ret i32 %0 +} + +; CHECK-LABEL: strb_ri_zero +; CHECK: strb r1, [r0] +define void @strb_ri_zero(i8* %p, i32 %x) { +entry: + %conv = trunc i32 %x to i8 + store i8 %conv, i8* %p, align 1 + ret void +} + +; CHECK-LABEL: strh_ri_zero +; CHECK: strh r1, [r0] +define void @strh_ri_zero(i16* %p, i32 %x) { +entry: + %conv = trunc i32 %x to i16 + store i16 %conv, i16* %p, align 2 + ret void +} + +; CHECK-LABEL: str_ri_zero +; CHECK: str r1, [r0] +define void @str_ri_zero(i32* %p, i32 %x) { +entry: + store i32 %x, i32* %p, align 4 + ret void +} + + +; Maximum Thumb-1 immediate offset + +; CHECK-LABEL: ldrsb_ri_t1_max +; CHECK-T1: movs r1, #31 +; CHECK-T1: ldrsb r0, [r0, r1] +; CHECK-T2: ldrsb.w r0, [r0, #31] +define i32 @ldrsb_ri_t1_max(i8* %p) { +entry: + %arrayidx = getelementptr inbounds i8, i8* %p, i32 31 + %0 = load i8, i8* %arrayidx, align 1 + %conv = sext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrsh_ri_t1_max +; CHECK-T1: movs r1, #62 +; CHECK-T1: ldrsh r0, [r0, r1] +; CHECK-T2: ldrsh.w r0, [r0, #62] +define i32 @ldrsh_ri_t1_max(i16* %p) { +entry: + %arrayidx = getelementptr inbounds i16, i16* %p, i32 31 + %0 = load i16, i16* %arrayidx, align 2 + %conv = sext i16 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrb_ri_t1_max +; CHECK: ldrb r0, [r0, #31] +define i32 @ldrb_ri_t1_max(i8* %p) { +entry: + %arrayidx = getelementptr inbounds i8, i8* %p, i32 31 + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrh_ri_t1_max +; CHECK: ldrh r0, [r0, #62] +define i32 @ldrh_ri_t1_max(i16* %p) { +entry: + %arrayidx = getelementptr inbounds i16, i16* %p, i32 31 + %0 = load i16, i16* %arrayidx, align 2 + %conv = zext i16 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldr_ri_t1_max +; CHECK: ldr r0, [r0, #124] +define i32 @ldr_ri_t1_max(i32* %p) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %p, i32 31 + %0 = load i32, i32* %arrayidx, align 4 + ret i32 %0 +} + +; CHECK-LABEL: strb_ri_t1_max +; CHECK: strb r1, [r0, #31] +define void @strb_ri_t1_max(i8* %p, i32 %x) { +entry: + %conv = trunc i32 %x to i8 + %arrayidx = getelementptr inbounds i8, i8* %p, i32 31 + store i8 %conv, i8* %arrayidx, align 1 + ret void +} + +; CHECK-LABEL: strh_ri_t1_max +; CHECK: strh r1, [r0, #62] +define void @strh_ri_t1_max(i16* %p, i32 %x) { +entry: + %conv = trunc i32 %x to i16 + %arrayidx = getelementptr inbounds i16, i16* %p, i32 31 + store i16 %conv, i16* %arrayidx, align 2 + ret void +} + +; CHECK-LABEL: str_ri_t1_max +; CHECK: str r1, [r0, #124] +define void @str_ri_t1_max(i32* %p, i32 %x) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %p, i32 31 + store i32 %x, i32* %arrayidx, align 4 + ret void +} + + +; One past maximum Thumb-1 immediate offset + +; CHECK-LABEL: ldrsb_ri_t1_too_big +; CHECK-T1: movs r1, #32 +; CHECK-T1: ldrsb r0, [r0, r1] +; CHECK-T2: ldrsb.w r0, [r0, #32] +define i32 @ldrsb_ri_t1_too_big(i8* %p) { +entry: + %arrayidx = getelementptr inbounds i8, i8* %p, i32 32 + %0 = load i8, i8* %arrayidx, align 1 + %conv = sext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrsh_ri_t1_too_big +; CHECK-T1: movs r1, #64 +; CHECK-T1: ldrsh r0, [r0, r1] +; CHECK-T2: ldrsh.w r0, [r0, #64] +define i32 @ldrsh_ri_t1_too_big(i16* %p) { +entry: + %arrayidx = getelementptr inbounds i16, i16* %p, i32 32 + %0 = load i16, i16* %arrayidx, align 2 + %conv = sext i16 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrb_ri_t1_too_big +; CHECK-T1: movs r1, #32 +; CHECK-T1: ldrb r0, [r0, r1] +; CHECK-T2: ldrb.w r0, [r0, #32] +define i32 @ldrb_ri_t1_too_big(i8* %p) { +entry: + %arrayidx = getelementptr inbounds i8, i8* %p, i32 32 + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrh_ri_t1_too_big +; CHECK-T1: movs r1, #64 +; CHECK-T1: ldrh r0, [r0, r1] +; CHECK-T2: ldrh.w r0, [r0, #64] +define i32 @ldrh_ri_t1_too_big(i16* %p) { entry: - %tmp = load i8, i8* %p ; [#uses=1] - %tmp1 = sext i8 %tmp to i32 ; [#uses=1] - ret i32 %tmp1 + %arrayidx = getelementptr inbounds i16, i16* %p, i32 32 + %0 = load i16, i16* %arrayidx, align 2 + %conv = zext i16 %0 to i32 + ret i32 %conv } -define i32 @f2(i8* %p) { +; CHECK-LABEL: ldr_ri_t1_too_big +; CHECK-T1: movs r1, #128 +; CHECK-T1: ldr r0, [r0, r1] +; CHECK-T2: ldr.w r0, [r0, #128] +define i32 @ldr_ri_t1_too_big(i32* %p) { entry: - %tmp = load i8, i8* %p ; [#uses=1] - %tmp2 = zext i8 %tmp to i32 ; [#uses=1] - ret i32 %tmp2 + %arrayidx = getelementptr inbounds i32, i32* %p, i32 32 + %0 = load i32, i32* %arrayidx, align 4 + ret i32 %0 } -define i32 @f3(i16* %p) { +; CHECK-LABEL: strb_ri_t1_too_big +; CHECK-T1: movs r2, #32 +; CHECK-T1: strb r1, [r0, r2] +; CHECK-T2: strb.w r1, [r0, #32] +define void @strb_ri_t1_too_big(i8* %p, i32 %x) { entry: - %tmp = load i16, i16* %p ; [#uses=1] - %tmp3 = sext i16 %tmp to i32 ; [#uses=1] - ret i32 %tmp3 + %conv = trunc i32 %x to i8 + %arrayidx = getelementptr inbounds i8, i8* %p, i32 32 + store i8 %conv, i8* %arrayidx, align 1 + ret void } -define i32 @f4(i16* %p) { +; CHECK-LABEL: strh_ri_t1_too_big +; CHECK-T1: movs r2, #64 +; CHECK-T1: strh r1, [r0, r2] +; CHECK-T2: strh.w r1, [r0, #64] +define void @strh_ri_t1_too_big(i16* %p, i32 %x) { entry: - %tmp = load i16, i16* %p ; [#uses=1] - %tmp4 = zext i16 %tmp to i32 ; [#uses=1] - ret i32 %tmp4 + %conv = trunc i32 %x to i16 + %arrayidx = getelementptr inbounds i16, i16* %p, i32 32 + store i16 %conv, i16* %arrayidx, align 2 + ret void } -; CHECK: ldrsb -; CHECK: ldrb -; CHECK: ldrsh -; CHECK: ldrh +; CHECK-LABEL: str_ri_t1_too_big +; CHECK-T1: movs r2, #128 +; CHECK-T1: str r1, [r0, r2] +; CHECK-T2: str.w r1, [r0, #128] +define void @str_ri_t1_too_big(i32* %p, i32 %x) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %p, i32 32 + store i32 %x, i32* %arrayidx, align 4 + ret void +} + + +; Maximum Thumb-2 immediate offset + +; CHECK-LABEL: ldrsb_ri_t2_max +; CHECK-T1: ldr r1, .LCP +; CHECK-T1: ldrsb r0, [r0, r1] +; CHECK-T2: ldrsb.w r0, [r0, #4095] +define i32 @ldrsb_ri_t2_max(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095 + %0 = load i8, i8* %add.ptr, align 1 + %conv = sext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrsh_ri_t2_max +; CHECK-T1: ldr r1, .LCP +; CHECK-T1: ldrsh r0, [r0, r1] +; CHECK-T2: ldrsh.w r0, [r0, #4095] +define i32 @ldrsh_ri_t2_max(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095 + %0 = bitcast i8* %add.ptr to i16* + %1 = load i16, i16* %0, align 2 + %conv = sext i16 %1 to i32 + ret i32 %conv +} +; CHECK-LABEL: ldrb_ri_t2_max +; CHECK-T1: ldr r1, .LCP +; CHECK-T1: ldrb r0, [r0, r1] +; CHECK-T2: ldrb.w r0, [r0, #4095] +define i32 @ldrb_ri_t2_max(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095 + %0 = load i8, i8* %add.ptr, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrh_ri_t2_max +; CHECK-T1: ldr r1, .LCP +; CHECK-T1: ldrh r0, [r0, r1] +; CHECK-T2: ldrh.w r0, [r0, #4095] +define i32 @ldrh_ri_t2_max(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095 + %0 = bitcast i8* %add.ptr to i16* + %1 = load i16, i16* %0, align 2 + %conv = zext i16 %1 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldr_ri_t2_max +; CHECK-T1: ldr r1, .LCP +; CHECK-T1: ldr r0, [r0, r1] +; CHECK-T2: ldr.w r0, [r0, #4095] +define i32 @ldr_ri_t2_max(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095 + %0 = bitcast i8* %add.ptr to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +; CHECK-LABEL: strb_ri_t2_max +; CHECK-T1: ldr r2, .LCP +; CHECK-T1: strb r1, [r0, r2] +; CHECK-T2: strb.w r1, [r0, #4095] +define void @strb_ri_t2_max(i8* %p, i32 %x) { +entry: + %conv = trunc i32 %x to i8 + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095 + store i8 %conv, i8* %add.ptr, align 1 + ret void +} + +; CHECK-LABEL: strh_ri_t2_max +; CHECK-T1: ldr r2, .LCP +; CHECK-T1: strh r1, [r0, r2] +; CHECK-T2: strh.w r1, [r0, #4095] +define void @strh_ri_t2_max(i8* %p, i32 %x) { +entry: + %conv = trunc i32 %x to i16 + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095 + %0 = bitcast i8* %add.ptr to i16* + store i16 %conv, i16* %0, align 2 + ret void +} + +; CHECK-LABEL: str_ri_t2_max +; CHECK-T1: ldr r2, .LCP +; CHECK-T1: str r1, [r0, r2] +; CHECK-T2: str.w r1, [r0, #4095] +define void @str_ri_t2_max(i8* %p, i32 %x) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095 + %0 = bitcast i8* %add.ptr to i32* + store i32 %x, i32* %0, align 4 + ret void +} + + +; One past maximum Thumb-2 immediate offset + +; CHECK-LABEL: ldrsb_ri_t2_too_big +; CHECK-T1: movs r1, #1 +; CHECK-T1: lsls r1, r1, #12 +; CHECK-T2: mov.w r1, #4096 +; CHECK: ldrsb r0, [r0, r1] +define i32 @ldrsb_ri_t2_too_big(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096 + %0 = load i8, i8* %add.ptr, align 1 + %conv = sext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrsh_ri_t2_too_big +; CHECK-T1: movs r1, #1 +; CHECK-T1: lsls r1, r1, #12 +; CHECK-T2: mov.w r1, #4096 +; CHECK: ldrsh r0, [r0, r1] +define i32 @ldrsh_ri_t2_too_big(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096 + %0 = bitcast i8* %add.ptr to i16* + %1 = load i16, i16* %0, align 2 + %conv = sext i16 %1 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrb_ri_t2_too_big +; CHECK-T1: movs r1, #1 +; CHECK-T1: lsls r1, r1, #12 +; CHECK-T2: mov.w r1, #4096 +; CHECK: ldrb r0, [r0, r1] +define i32 @ldrb_ri_t2_too_big(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096 + %0 = load i8, i8* %add.ptr, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldrh_ri_t2_too_big +; CHECK-T1: movs r1, #1 +; CHECK-T1: lsls r1, r1, #12 +; CHECK-T2: mov.w r1, #4096 +; CHECK: ldrh r0, [r0, r1] +define i32 @ldrh_ri_t2_too_big(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096 + %0 = bitcast i8* %add.ptr to i16* + %1 = load i16, i16* %0, align 2 + %conv = zext i16 %1 to i32 + ret i32 %conv +} + +; CHECK-LABEL: ldr_ri_t2_too_big +; CHECK-T1: movs r1, #1 +; CHECK-T1: lsls r1, r1, #12 +; CHECK-T2: mov.w r1, #4096 +; CHECK: ldr r0, [r0, r1] +define i32 @ldr_ri_t2_too_big(i8* %p) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096 + %0 = bitcast i8* %add.ptr to i32* + %1 = load i32, i32* %0, align 4 + ret i32 %1 +} + +; CHECK-LABEL: strb_ri_t2_too_big +; CHECK-T1: movs r2, #1 +; CHECK-T1: lsls r2, r2, #12 +; CHECK-T2: mov.w r2, #4096 +; CHECK: strb r1, [r0, r2] +define void @strb_ri_t2_too_big(i8* %p, i32 %x) { +entry: + %conv = trunc i32 %x to i8 + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096 + store i8 %conv, i8* %add.ptr, align 1 + ret void +} + +; CHECK-LABEL: strh_ri_t2_too_big +; CHECK-T1: movs r2, #1 +; CHECK-T1: lsls r2, r2, #12 +; CHECK-T2: mov.w r2, #4096 +; CHECK: strh r1, [r0, r2] +define void @strh_ri_t2_too_big(i8* %p, i32 %x) { +entry: + %conv = trunc i32 %x to i16 + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096 + %0 = bitcast i8* %add.ptr to i16* + store i16 %conv, i16* %0, align 2 + ret void +} + +; CHECK-LABEL: str_ri_t2_too_big +; CHECK-T1: movs r2, #1 +; CHECK-T1: lsls r2, r2, #12 +; CHECK-T2: mov.w r2, #4096 +; CHECK: str r1, [r0, r2] +define void @str_ri_t2_too_big(i8* %p, i32 %x) { +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096 + %0 = bitcast i8* %add.ptr to i32* + store i32 %x, i32* %0, align 4 + ret void +} -- 2.7.4