From: Tim Northover Date: Tue, 12 Mar 2019 15:22:23 +0000 (+0000) Subject: CodeGenPrep: preserve inbounds attribute when sinking GEPs. X-Git-Tag: llvmorg-10-init~10205 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8935aca9c74b0bec9e9b2a7584b64361aabc3a45;p=platform%2Fupstream%2Fllvm.git CodeGenPrep: preserve inbounds attribute when sinking GEPs. Targets can potentially emit more efficient code if they know address computations never overflow. For example ILP32 code on AArch64 (which only has 64-bit address computation) can ignore the possibility of overflow with this extra information. llvm-svn: 355926 --- diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 1d61d32..e70e4ee 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2073,6 +2073,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode { Value *BaseReg = nullptr; Value *ScaledReg = nullptr; Value *OriginalValue = nullptr; + bool InBounds = true; enum FieldName { NoField = 0x00, @@ -2084,6 +2085,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode { MultipleFields = 0xff }; + ExtAddrMode() = default; void print(raw_ostream &OS) const; @@ -2102,6 +2104,10 @@ struct ExtAddrMode : public TargetLowering::AddrMode { ScaledReg->getType() != other.ScaledReg->getType()) return MultipleFields; + // Conservatively reject 'inbounds' mismatches. + if (InBounds != other.InBounds) + return MultipleFields; + // Check each field to see if it differs. unsigned Result = NoField; if (BaseReg != other.BaseReg) @@ -2200,6 +2206,8 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { void ExtAddrMode::print(raw_ostream &OS) const { bool NeedPlus = false; OS << "["; + if (InBounds) + OS << "inbounds "; if (BaseGV) { OS << (NeedPlus ? " + " : "") << "GV:"; @@ -3498,6 +3506,7 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale, ConstantInt *CI = nullptr; Value *AddLHS = nullptr; if (isa(ScaleReg) && // not a constant expr. match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { + TestAddrMode.InBounds = false; TestAddrMode.ScaledReg = AddLHS; TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; @@ -4072,6 +4081,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); + AddrMode.InBounds = false; if (matchAddr(AddrInst->getOperand(1), Depth+1) && matchAddr(AddrInst->getOperand(0), Depth+1)) return true; @@ -4098,6 +4108,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, case Instruction::Mul: case Instruction::Shl: { // Can only handle X*C and X << C. + AddrMode.InBounds = false; ConstantInt *RHS = dyn_cast(AddrInst->getOperand(1)); if (!RHS || RHS->getBitWidth() > 64) return false; @@ -4149,8 +4160,11 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, if (ConstantOffset == 0 || TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) { // Check to see if we can fold the base pointer in too. - if (matchAddr(AddrInst->getOperand(0), Depth+1)) + if (matchAddr(AddrInst->getOperand(0), Depth+1)) { + if (!cast(AddrInst)->isInBounds()) + AddrMode.InBounds = false; return true; + } } else if (EnableGEPOffsetSplit && isa(AddrInst) && TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 && ConstantOffset > 0) { @@ -4186,6 +4200,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, // See if the scale and offset amount is valid for this target. AddrMode.BaseOffs += ConstantOffset; + if (!cast(AddrInst)->isInBounds()) + AddrMode.InBounds = false; // Match the base operand of the GEP. if (!matchAddr(AddrInst->getOperand(0), Depth+1)) { @@ -4871,7 +4887,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // SDAG consecutive load/store merging. if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); - ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); + ResultPtr = + AddrMode.InBounds + ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex, + "sunkaddr") + : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); } ResultIndex = V; @@ -4882,7 +4902,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } else { if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); - SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); + SunkAddr = + AddrMode.InBounds + ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex, + "sunkaddr") + : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); } if (SunkAddr->getType() != Addr->getType()) diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll index 22c6c92..47ca605 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -492,7 +492,7 @@ done: %struct.foo = type { [3 x float], [3 x float] } ; OPT-LABEL: @sink_ds_address( -; OPT: getelementptr i8, +; OPT: getelementptr inbounds i8, ; GCN-LABEL: {{^}}sink_ds_address: ; GCN: s_load_dword [[SREG1:s[0-9]+]], diff --git a/llvm/test/CodeGen/Thumb/addr-modes.ll b/llvm/test/CodeGen/Thumb/addr-modes.ll index e6ed01d..e04d483 100644 --- a/llvm/test/CodeGen/Thumb/addr-modes.ll +++ b/llvm/test/CodeGen/Thumb/addr-modes.ll @@ -14,7 +14,7 @@ target triple = "thumbv6m-arm-none-eabi" ; Test case 01: %n is scaled by 4 (size of i32). ; Expected: GEP cannot be folded into LOAD. -; CHECK: local addrmode: [Base:%arrayidx] +; CHECK: local addrmode: [inbounds Base:%arrayidx] define i32 @load01(i32* %p, i32 %n) nounwind { entry: %arrayidx = getelementptr inbounds i32, i32* %p, i32 %n @@ -24,7 +24,7 @@ entry: ; Test case 02: No scale of %n is needed because the size of i8 is 1. ; Expected: GEP can be folded into LOAD. -; CHECK: local addrmode: [Base:%p + 1*%n] +; CHECK: local addrmode: [inbounds Base:%p + 1*%n] define i8 @load02(i8* %p, i32 %n) nounwind { entry: %arrayidx = getelementptr inbounds i8, i8* %p, i32 %n diff --git a/llvm/test/Transforms/CodeGenPrepare/Mips/pr35209.ll b/llvm/test/Transforms/CodeGenPrepare/Mips/pr35209.ll index 754f8fa..d0ba90b 100644 --- a/llvm/test/Transforms/CodeGenPrepare/Mips/pr35209.ll +++ b/llvm/test/Transforms/CodeGenPrepare/Mips/pr35209.ll @@ -54,7 +54,7 @@ cl: ; preds = %sw.bb, %entry ; CHECK-NOT: %{{[0-9]+}} = load %struct.bt*, %struct.bt** %bw ; CHECK: %[[I1:[0-9]+]] = bitcast %struct.az* %[[I0]] to i8* -; CHECK-NEXT: %sunkaddr = getelementptr i8, i8* %[[I1]], i64 8 +; CHECK-NEXT: %sunkaddr = getelementptr inbounds i8, i8* %[[I1]], i64 8 ; CHECK-NEXT: %[[I2:[0-9]+]] = bitcast i8* %sunkaddr to %struct.bt** ; CHECK-NEXT: %{{[0-9]+}} = load %struct.bt*, %struct.bt** %[[I2]] ; CHECK-NEXT: tail call void (i8*, ...) @a diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll b/llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll index cf04559..6a3804f 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll @@ -219,7 +219,7 @@ define void @nophi(i32* %p) { ; CHECK-NEXT: br label [[INDIRECTGOTO]] ; CHECK: indirectgoto: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to i8* -; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, i8* [[TMP0]], i64 4 +; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SUNKADDR]] to i32* ; CHECK-NEXT: [[NEWP:%.*]] = load i32, i32* [[TMP1]], align 4 ; CHECK-NEXT: [[IDX:%.*]] = sext i32 [[NEWP]] to i64 diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll index 5cb64f2..e914c1a 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-base.ll @@ -41,7 +41,7 @@ if.then: br label %fallthrough fallthrough: -; CHECK: getelementptr i8, {{.+}} 40 +; CHECK: getelementptr inbounds i8, {{.+}} 40 %b = phi i64* [%b1, %entry], [%b2, %if.then] %c = phi i32* [%c1, %entry], [%c2, %if.then] %v = load i32, i32* %c, align 4 @@ -111,7 +111,7 @@ if.then: br label %fallthrough fallthrough: -; CHECK: getelementptr i8, {{.+}} 40 +; CHECK: getelementptr inbounds i8, {{.+}} 40 %b = phi i64* [%b1, %entry], [%b2, %if.then] %c = phi i32* [%c1, %entry], [%c2, %if.then] %v = load i32, i32* %c, align 4 @@ -199,7 +199,7 @@ if.then: br label %fallthrough fallthrough: -; CHECK: getelementptr i8, {{.+}} 40 +; CHECK: getelementptr inbounds i8, {{.+}} 40 %c = phi i32* [%c3, %loop], [%c2, %if.then] %b = phi i64* [%b3, %loop], [%b2, %if.then] %v = load volatile i32, i32* %c, align 4 diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll index ec4ad9a..4d28e06 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll @@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu" ; Can we sink single addressing mode computation to use? define void @test1(i1 %cond, i64* %base) { ; CHECK-LABEL: @test1 -; CHECK: getelementptr i8, {{.+}} 40 +; CHECK: getelementptr inbounds i8, {{.+}} 40 entry: %addr = getelementptr inbounds i64, i64* %base, i64 5 %casted = bitcast i64* %addr to i32* @@ -35,7 +35,7 @@ entry: if.then: ; CHECK-LABEL: if.then: -; CHECK: getelementptr i8, {{.+}} 40 +; CHECK: getelementptr inbounds i8, {{.+}} 40 %v1 = load i32, i32* %casted, align 4 call void @foo(i32 %v1) %cmp = icmp eq i32 %v1, 0 @@ -43,7 +43,7 @@ if.then: next: ; CHECK-LABEL: next: -; CHECK: getelementptr i8, {{.+}} 40 +; CHECK: getelementptr inbounds i8, {{.+}} 40 %v2 = load i32, i32* %casted, align 4 call void @foo(i32 %v2) br label %fallthrough @@ -63,10 +63,10 @@ entry: if.then: ; CHECK-LABEL: if.then: -; CHECK: getelementptr i8, {{.+}} 40 +; CHECK: getelementptr inbounds i8, {{.+}} 40 %v1 = load i32, i32* %casted, align 4 call void @foo(i32 %v1) -; CHECK-NOT: getelementptr i8, {{.+}} 40 +; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40 %v2 = load i32, i32* %casted, align 4 call void @foo(i32 %v2) br label %fallthrough @@ -86,7 +86,7 @@ entry: if.then: ; CHECK-LABEL: if.then: -; CHECK: getelementptr i8, {{.+}} 40 +; CHECK: getelementptr inbounds i8, {{.+}} 40 %v1 = load i32, i32* %casted, align 4 call void @foo(i32 %v1) %cmp = icmp eq i32 %v1, 0 @@ -97,7 +97,7 @@ fallthrough: rare.1: ; CHECK-LABEL: rare.1: -; CHECK: getelementptr i8, {{.+}} 40 +; CHECK: getelementptr inbounds i8, {{.+}} 40 call void @slowpath(i32 %v1, i32* %casted) cold br label %fallthrough } @@ -106,14 +106,14 @@ rare.1: define void @test5(i1 %cond, i64* %base) { ; CHECK-LABEL: @test5 entry: -; CHECK: %addr = getelementptr +; CHECK: %addr = getelementptr inbounds %addr = getelementptr inbounds i64, i64* %base, i64 5 %casted = bitcast i64* %addr to i32* br i1 %cond, label %if.then, label %fallthrough if.then: ; CHECK-LABEL: if.then: -; CHECK-NOT: getelementptr i8, {{.+}} 40 +; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40 %v1 = load i32, i32* %casted, align 4 call void @foo(i32 %v1) %cmp = icmp eq i32 %v1, 0 @@ -138,7 +138,7 @@ entry: if.then: ; CHECK-LABEL: if.then: -; CHECK-NOT: getelementptr i8, {{.+}} 40 +; CHECK-NOT: getelementptr inbounds i8, {{.+}} 40 %v1 = load i32, i32* %casted, align 4 call void @foo(i32 %v1) %cmp = icmp eq i32 %v1, 0 @@ -164,7 +164,7 @@ entry: if.then: ; CHECK-LABEL: if.then: -; CHECK: getelementptr i8, {{.+}} 40 +; CHECK: getelementptr inbounds i8, {{.+}} 40 %v1 = load i32, i32* %casted, align 4 call void @foo(i32 %v1) %cmp = icmp eq i32 %v1, 0 @@ -172,7 +172,7 @@ if.then: next: ; CHECK-LABEL: next: -; CHECK: getelementptr i8, {{.+}} 40 +; CHECK: getelementptr inbounds i8, {{.+}} 40 %v2 = load i32, i32* %casted, align 4 call void @foo(i32 %v2) %cmp2 = icmp eq i32 %v2, 0 @@ -183,13 +183,13 @@ fallthrough: rare.1: ; CHECK-LABEL: rare.1: -; CHECK: getelementptr i8, {{.+}} 40 +; CHECK: getelementptr inbounds i8, {{.+}} 40 call void @slowpath(i32 %v1, i32* %casted) cold br label %next rare.2: ; CHECK-LABEL: rare.2: -; CHECK: getelementptr i8, {{.+}} 40 +; CHECK: getelementptr inbounds i8, {{.+}} 40 call void @slowpath(i32 %v2, i32* %casted) cold br label %fallthrough } @@ -240,7 +240,7 @@ if.then: backedge: ; CHECK-LABEL: backedge: -; CHECK: getelementptr i8, {{.+}} 40 +; CHECK: getelementptr inbounds i8, {{.+}} 40 %casted.merged = phi i32* [%casted.loop, %header], [%casted.1, %if.then] %v = load i32, i32* %casted.merged, align 4 call void @foo(i32 %v) @@ -256,7 +256,7 @@ exit: ; address computation. define void @test10(i1 %cond, i64* %base) { ; CHECK-LABEL: @test10 -; CHECK: getelementptr i8, {{.+}} 40 +; CHECK: getelementptr inbounds i8, {{.+}} 40 ; CHECK-NOT: select entry: %gep1 = getelementptr inbounds i64, i64* %base, i64 5 diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll index 31f0ca2..b716ef9 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll @@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-LABEL: @load_cast_gep ; GEP: [[CAST:%[0-9]+]] = addrspacecast i64* %base to i8 addrspace(1)* -; GEP: getelementptr i8, i8 addrspace(1)* [[CAST]], i64 40 +; GEP: getelementptr inbounds i8, i8 addrspace(1)* [[CAST]], i64 40 define void @load_cast_gep(i1 %cond, i64* %base) { entry: %addr = getelementptr inbounds i64, i64* %base, i64 5 @@ -23,7 +23,7 @@ fallthrough: ; CHECK-LABEL: @store_gep_cast ; GEP: [[CAST:%[0-9]+]] = addrspacecast i64* %base to i8 addrspace(1)* -; GEP: getelementptr i8, i8 addrspace(1)* [[CAST]], i64 20 +; GEP: getelementptr inbounds i8, i8 addrspace(1)* [[CAST]], i64 20 define void @store_gep_cast(i1 %cond, i64* %base) { entry: %casted = addrspacecast i64* %base to i32 addrspace(1)*