From 6e7eeb44b305391f736437d050729b09c02fda0f Mon Sep 17 00:00:00 2001 From: Huihui Zhang Date: Fri, 10 Apr 2020 17:48:56 -0700 Subject: [PATCH] [GVN] Fix VNCoercion for Scalable Vector. Summary: For VNCoercion, skip scalable vector when analysis rely on fixed size, otherwise call TypeSize::getFixedSize() explicitly. Add unit tests to check funtionality of GVN load elimination for scalable type. Reviewers: sdesmalen, efriedma, spatel, fhahn, reames, apazos, ctetreau Reviewed By: efriedma Subscribers: bjope, hiraditya, jfb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D76944 --- .../InstCombine/InstructionCombining.cpp | 2 +- llvm/lib/Transforms/Utils/VNCoercion.cpp | 63 ++-- llvm/test/Transforms/GVN/vscale.ll | 344 +++++++++++++++++++++ 3 files changed, 380 insertions(+), 29 deletions(-) create mode 100644 llvm/test/Transforms/GVN/vscale.ll diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 687ac25..4f86cdd 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1902,7 +1902,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If the element type has zero size then any index over it is equivalent // to an index of zero, so replace it with zero if it is not zero already. Type *EltTy = GTI.getIndexedType(); - if (EltTy->isSized() && DL.getTypeAllocSize(EltTy) == 0) + if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero()) if (!isa(*I) || !match(I->get(), m_Zero())) { *I = Constant::getNullValue(NewIndexType); MadeChange = true; diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp index 23fb770..0c05c29 100644 --- a/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -10,6 +10,11 @@ namespace llvm { namespace VNCoercion { +static bool isFirstClassAggregateOrScalableType(Type *Ty) { + return Ty->isStructTy() || Ty->isArrayTy() || + (Ty->isVectorTy() && Ty->getVectorIsScalable()); +} + /// Return true if coerceAvailableValueToLoadType will succeed. bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, const DataLayout &DL) { @@ -17,20 +22,20 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, if (StoredTy == LoadTy) return true; - // If the loaded or stored value is an first class array or struct, don't try - // to transform them. We need to be able to bitcast to integer. - if (LoadTy->isStructTy() || LoadTy->isArrayTy() || StoredTy->isStructTy() || - StoredTy->isArrayTy()) + // If the loaded/stored value is a first class array/struct, or scalable type, + // don't try to transform them. We need to be able to bitcast to integer. + if (isFirstClassAggregateOrScalableType(LoadTy) || + isFirstClassAggregateOrScalableType(StoredTy)) return false; - uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy); + uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy).getFixedSize(); // The store size must be byte-aligned to support future type casts. if (llvm::alignTo(StoreSize, 8) != StoreSize) return false; // The store has to be at least as big as the load. - if (StoreSize < DL.getTypeSizeInBits(LoadTy)) + if (StoreSize < DL.getTypeSizeInBits(LoadTy).getFixedSize()) return false; // Don't coerce non-integral pointers to integers or vice versa. @@ -59,8 +64,8 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, // If this is already the right type, just return it. Type *StoredValTy = StoredVal->getType(); - uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy); - uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy); + uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy).getFixedSize(); + uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy).getFixedSize(); // If the store and reload are the same size, we can always reuse it. if (StoredValSize == LoadedValSize) { @@ -112,8 +117,8 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, // If this is a big-endian system, we need to shift the value down to the low // bits so that a truncate will work. if (DL.isBigEndian()) { - uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) - - DL.getTypeStoreSizeInBits(LoadedTy); + uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy).getFixedSize() - + DL.getTypeStoreSizeInBits(LoadedTy).getFixedSize(); StoredVal = Helper.CreateLShr( StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt)); } @@ -161,9 +166,9 @@ static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, Value *WritePtr, uint64_t WriteSizeInBits, const DataLayout &DL) { - // If the loaded or stored value is a first class array or struct, don't try - // to transform them. We need to be able to bitcast to integer. - if (LoadTy->isStructTy() || LoadTy->isArrayTy()) + // If the loaded/stored value is a first class array/struct, or scalable type, + // don't try to transform them. We need to be able to bitcast to integer. + if (isFirstClassAggregateOrScalableType(LoadTy)) return -1; int64_t StoreOffset = 0, LoadOffset = 0; @@ -181,7 +186,7 @@ static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, // If the load and store don't overlap at all, the store doesn't provide // anything to the load. In this case, they really don't alias at all, AA // must have gotten confused. - uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy); + uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize(); if ((WriteSizeInBits & 7) | (LoadSize & 7)) return -1; @@ -215,10 +220,9 @@ static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, StoreInst *DepSI, const DataLayout &DL) { auto *StoredVal = DepSI->getValueOperand(); - - // Cannot handle reading from store of first-class aggregate yet. - if (StoredVal->getType()->isStructTy() || - StoredVal->getType()->isArrayTy()) + + // Cannot handle reading from store of first-class aggregate or scalable type. + if (isFirstClassAggregateOrScalableType(StoredVal->getType())) return -1; // Don't coerce non-integral pointers to integers or vice versa. @@ -232,7 +236,7 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, Value *StorePtr = DepSI->getPointerOperand(); uint64_t StoreSize = - DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()); + DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()).getFixedSize(); return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize, DL); } @@ -337,7 +341,7 @@ int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, return -1; Value *DepPtr = DepLI->getPointerOperand(); - uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()); + uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()).getFixedSize(); int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); if (R != -1) return R; @@ -347,7 +351,7 @@ int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, int64_t LoadOffs = 0; const Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL); - unsigned LoadSize = DL.getTypeStoreSize(LoadTy); + unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedSize(); unsigned Size = getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI); @@ -437,8 +441,9 @@ static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy, return SrcVal; } - uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; - uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8; + uint64_t StoreSize = + (DL.getTypeSizeInBits(SrcVal->getType()).getFixedSize() + 7) / 8; + uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy).getFixedSize() + 7) / 8; // Compute which bits of the stored value are being used by the load. Convert // to an integer type to start with. if (SrcVal->getType()->isPtrOrPtrVectorTy()) @@ -490,8 +495,9 @@ Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy, Instruction *InsertPt, const DataLayout &DL) { // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to // widen SrcVal out to a larger load. - unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType()); - unsigned LoadSize = DL.getTypeStoreSize(LoadTy); + unsigned SrcValStoreSize = + DL.getTypeStoreSize(SrcVal->getType()).getFixedSize(); + unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedSize(); if (Offset + LoadSize > SrcValStoreSize) { assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!"); assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load"); @@ -534,8 +540,9 @@ Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy, Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset, Type *LoadTy, const DataLayout &DL) { - unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType()); - unsigned LoadSize = DL.getTypeStoreSize(LoadTy); + unsigned SrcValStoreSize = + DL.getTypeStoreSize(SrcVal->getType()).getFixedSize(); + unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedSize(); if (Offset + LoadSize > SrcValStoreSize) return nullptr; return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL); @@ -546,7 +553,7 @@ T *getMemInstValueForLoadHelper(MemIntrinsic *SrcInst, unsigned Offset, Type *LoadTy, HelperClass &Helper, const DataLayout &DL) { LLVMContext &Ctx = LoadTy->getContext(); - uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy) / 8; + uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize() / 8; // We know that this method is only called when the mem transfer fully // provides the bits for the load. diff --git a/llvm/test/Transforms/GVN/vscale.ll b/llvm/test/Transforms/GVN/vscale.ll new file mode 100644 index 0000000..da98fd8 --- /dev/null +++ b/llvm/test/Transforms/GVN/vscale.ll @@ -0,0 +1,344 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S < %s -basicaa -gvn -dce | FileCheck %s + +; Analyze Load from clobbering Load. + +define @load_store_clobber_load( *%p) { +; CHECK-LABEL: @load_store_clobber_load( +; CHECK-NEXT: [[LOAD1:%.*]] = load , * [[P:%.*]] +; CHECK-NEXT: store zeroinitializer, * undef +; CHECK-NEXT: [[ADD:%.*]] = add [[LOAD1]], [[LOAD1]] +; CHECK-NEXT: ret [[ADD]] +; + %load1 = load , * %p + store zeroinitializer, * undef + %load2 = load , * %p ; <- load to be eliminated + %add = add %load1, %load2 + ret %add +} + +define @load_store_clobber_load_mayalias(* %p, * %p2) { +; CHECK-LABEL: @load_store_clobber_load_mayalias( +; CHECK-NEXT: [[LOAD1:%.*]] = load , * [[P:%.*]] +; CHECK-NEXT: store zeroinitializer, * [[P2:%.*]] +; CHECK-NEXT: [[LOAD2:%.*]] = load , * [[P]] +; CHECK-NEXT: [[SUB:%.*]] = sub [[LOAD1]], [[LOAD2]] +; CHECK-NEXT: ret [[SUB]] +; + %load1 = load , * %p + store zeroinitializer, * %p2 + %load2 = load , * %p + %sub = sub %load1, %load2 + ret %sub +} + +define @load_store_clobber_load_noalias(* noalias %p, * noalias %p2) { +; CHECK-LABEL: @load_store_clobber_load_noalias( +; CHECK-NEXT: [[LOAD1:%.*]] = load , * [[P:%.*]] +; CHECK-NEXT: store zeroinitializer, * [[P2:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add [[LOAD1]], [[LOAD1]] +; CHECK-NEXT: ret [[ADD]] +; + %load1 = load , * %p + store zeroinitializer, * %p2 + %load2 = load , * %p ; <- load to be eliminated + %add = add %load1, %load2 + ret %add +} + +; TODO: BasicAA return MayAlias for %gep1,%gep2, could improve as MustAlias. +define i32 @load_clobber_load_gep1(* %p) { +; CHECK-LABEL: @load_clobber_load_gep1( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , * [[P:%.*]], i64 0, i64 1 +; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32* [[GEP1]] +; CHECK-NEXT: [[P2:%.*]] = bitcast * [[P]] to i32* +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, i32* [[P2]], i64 1 +; CHECK-NEXT: [[LOAD2:%.*]] = load i32, i32* [[GEP2]] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD2]] +; CHECK-NEXT: ret i32 [[ADD]] +; + %gep1 = getelementptr , * %p, i64 0, i64 1 + %load1 = load i32, i32* %gep1 + %p2 = bitcast * %p to i32* + %gep2 = getelementptr i32, i32* %p2, i64 1 + %load2 = load i32, i32* %gep2 ; <- load could be eliminated + %add = add i32 %load1, %load2 + ret i32 %add +} + +define i32 @load_clobber_load_gep2(* %p) { +; CHECK-LABEL: @load_clobber_load_gep2( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , * [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32* [[GEP1]] +; CHECK-NEXT: [[P2:%.*]] = bitcast * [[P]] to i32* +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, i32* [[P2]], i64 4 +; CHECK-NEXT: [[LOAD2:%.*]] = load i32, i32* [[GEP2]] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD2]] +; CHECK-NEXT: ret i32 [[ADD]] +; + %gep1 = getelementptr , * %p, i64 1, i64 0 + %load1 = load i32, i32* %gep1 + %p2 = bitcast * %p to i32* + %gep2 = getelementptr i32, i32* %p2, i64 4 + %load2 = load i32, i32* %gep2 ; <- can not determine at compile-time if %load1 and %load2 are same addr + %add = add i32 %load1, %load2 + ret i32 %add +} + +; TODO: BasicAA return MayAlias for %gep1,%gep2, could improve as MustAlias. +define i32 @load_clobber_load_gep3(* %p) { +; CHECK-LABEL: @load_clobber_load_gep3( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , * [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32* [[GEP1]] +; CHECK-NEXT: [[P2:%.*]] = bitcast * [[P]] to * +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr , * [[P2]], i64 1, i64 0 +; CHECK-NEXT: [[LOAD2:%.*]] = load float, float* [[GEP2]] +; CHECK-NEXT: [[CAST:%.*]] = bitcast float [[LOAD2]] to i32 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[CAST]] +; CHECK-NEXT: ret i32 [[ADD]] +; + %gep1 = getelementptr , * %p, i64 1, i64 0 + %load1 = load i32, i32* %gep1 + %p2 = bitcast * %p to * + %gep2 = getelementptr , * %p2, i64 1, i64 0 + %load2 = load float, float* %gep2 ; <- load could be eliminated + %cast = bitcast float %load2 to i32 + %add = add i32 %load1, %cast + ret i32 %add +} + +define @load_clobber_load_fence(* %p) { +; CHECK-LABEL: @load_clobber_load_fence( +; CHECK-NEXT: [[LOAD1:%.*]] = load , * [[P:%.*]] +; CHECK-NEXT: call void asm "", "~{memory}"() +; CHECK-NEXT: [[LOAD2:%.*]] = load , * [[P]] +; CHECK-NEXT: [[SUB:%.*]] = sub [[LOAD1]], [[LOAD2]] +; CHECK-NEXT: ret [[SUB]] +; + %load1 = load , * %p + call void asm "", "~{memory}"() + %load2 = load , * %p + %sub = sub %load1, %load2 + ret %sub +} + +define @load_clobber_load_sideeffect(* %p) { +; CHECK-LABEL: @load_clobber_load_sideeffect( +; CHECK-NEXT: [[LOAD1:%.*]] = load , * [[P:%.*]] +; CHECK-NEXT: call void asm sideeffect "", ""() +; CHECK-NEXT: [[LOAD2:%.*]] = load , * [[P]] +; CHECK-NEXT: [[ADD:%.*]] = add [[LOAD1]], [[LOAD2]] +; CHECK-NEXT: ret [[ADD]] +; + %load1 = load , * %p + call void asm sideeffect "", ""() + %load2 = load , * %p + %add = add %load1, %load2 + ret %add +} + +; Analyze Load from clobbering Store. + +define @store_forward_to_load(* %p) { +; CHECK-LABEL: @store_forward_to_load( +; CHECK-NEXT: store zeroinitializer, * [[P:%.*]] +; CHECK-NEXT: ret zeroinitializer +; + store zeroinitializer, * %p + %load = load , * %p + ret %load +} + +define @store_forward_to_load_sideeffect(* %p) { +; CHECK-LABEL: @store_forward_to_load_sideeffect( +; CHECK-NEXT: store zeroinitializer, * [[P:%.*]] +; CHECK-NEXT: call void asm sideeffect "", ""() +; CHECK-NEXT: [[LOAD:%.*]] = load , * [[P]] +; CHECK-NEXT: ret [[LOAD]] +; + store zeroinitializer, * %p + call void asm sideeffect "", ""() + %load = load , * %p + ret %load +} + +define i32 @store_clobber_load() { +; CHECK-LABEL: @store_clobber_load( +; CHECK-NEXT: [[ALLOC:%.*]] = alloca +; CHECK-NEXT: store undef, * [[ALLOC]] +; CHECK-NEXT: [[PTR:%.*]] = getelementptr , * [[ALLOC]], i32 0, i32 1 +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[PTR]] +; CHECK-NEXT: ret i32 [[LOAD]] +; + %alloc = alloca + store undef, * %alloc + %ptr = getelementptr , * %alloc, i32 0, i32 1 + %load = load i32, i32* %ptr + ret i32 %load +} + +; Analyze Load from clobbering MemInst. + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) + +define i32 @memset_clobber_load( *%p) { +; CHECK-LABEL: @memset_clobber_load( +; CHECK-NEXT: [[CONV:%.*]] = bitcast * [[P:%.*]] to i8* +; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[CONV]], i8 1, i64 200, i1 false) +; CHECK-NEXT: ret i32 16843009 +; + %conv = bitcast * %p to i8* + tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i1 false) + %gep = getelementptr , * %p, i64 0, i64 5 + %load = load i32, i32* %gep + ret i32 %load +} + +define i32 @memset_clobber_load_vscaled_base( *%p) { +; CHECK-LABEL: @memset_clobber_load_vscaled_base( +; CHECK-NEXT: [[CONV:%.*]] = bitcast * [[P:%.*]] to i8* +; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[CONV]], i8 1, i64 200, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr , * [[P]], i64 1, i64 1 +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[GEP]] +; CHECK-NEXT: ret i32 [[LOAD]] +; + %conv = bitcast * %p to i8* + tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i1 false) + %gep = getelementptr , * %p, i64 1, i64 1 + %load = load i32, i32* %gep + ret i32 %load +} + +define i32 @memset_clobber_load_nonconst_index( *%p, i64 %idx1, i64 %idx2) { +; CHECK-LABEL: @memset_clobber_load_nonconst_index( +; CHECK-NEXT: [[CONV:%.*]] = bitcast * [[P:%.*]] to i8* +; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[CONV]], i8 1, i64 200, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr , * [[P]], i64 [[IDX1:%.*]], i64 [[IDX2:%.*]] +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[GEP]] +; CHECK-NEXT: ret i32 [[LOAD]] +; + %conv = bitcast * %p to i8* + tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i1 false) + %gep = getelementptr , * %p, i64 %idx1, i64 %idx2 + %load = load i32, i32* %gep + ret i32 %load +} + + +; Load elimination across BBs + +define * @load_from_alloc_replaced_with_undef() { +; CHECK-LABEL: @load_from_alloc_replaced_with_undef( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca +; CHECK-NEXT: br i1 undef, label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store zeroinitializer, * [[A]] +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret * [[A]] +; +entry: + %a = alloca + %gep = getelementptr , * %a, i64 0, i64 1 + %load = load i32, i32* %gep ; <- load to be eliminated + %tobool = icmp eq i32 %load, 0 ; <- icmp to be eliminated + br i1 %tobool, label %if.end, label %if.then + +if.then: + store zeroinitializer, * %a + br label %if.end + +if.end: + ret * %a +} + +define i32 @redundant_load_elimination_1(* %p) { +; CHECK-LABEL: @redundant_load_elimination_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr , * [[P:%.*]], i64 1, i64 1 +; CHECK-NEXT: [[LOAD1:%.*]] = load i32, i32* [[GEP]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LOAD1]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret i32 [[LOAD1]] +; +entry: + %gep = getelementptr , * %p, i64 1, i64 1 + %load1 = load i32, i32* %gep + %cmp = icmp eq i32 %load1, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %load2 = load i32, i32* %gep ; <- load to be eliminated + %add = add i32 %load1, %load2 + br label %if.end + +if.end: + %result = phi i32 [ %add, %if.then ], [ %load1, %entry ] + ret i32 %result +} + +; TODO: BasicAA return MayAlias for %gep1,%gep2, could improve as NoAlias. +define void @redundant_load_elimination_2(i1 %c, * %p, i32* %q, %v) { +; CHECK-LABEL: @redundant_load_elimination_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , * [[P:%.*]], i64 1, i64 1 +; CHECK-NEXT: store i32 0, i32* [[GEP1]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr , * [[P]], i64 1, i64 0 +; CHECK-NEXT: store i32 1, i32* [[GEP2]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[T:%.*]] = load i32, i32* [[GEP1]] +; CHECK-NEXT: store i32 [[T]], i32* [[Q:%.*]] +; CHECK-NEXT: ret void +; CHECK: if.else: +; CHECK-NEXT: ret void +; +entry: + %gep1 = getelementptr , * %p, i64 1, i64 1 + store i32 0, i32* %gep1 + %gep2 = getelementptr , * %p, i64 1, i64 0 + store i32 1, i32* %gep2 + br i1 %c, label %if.else, label %if.then + +if.then: + %t = load i32, i32* %gep1 ; <- load could be eliminated + store i32 %t, i32* %q + ret void + +if.else: + ret void +} + +; TODO: load in if.then could have been eliminated +define void @missing_load_elimination(i1 %c, * %p, * %q, %v) { +; CHECK-LABEL: @missing_load_elimination( +; CHECK-NEXT: entry: +; CHECK-NEXT: store zeroinitializer, * [[P:%.*]] +; CHECK-NEXT: [[P1:%.*]] = getelementptr , * [[P]], i64 1 +; CHECK-NEXT: store [[V:%.*]], * [[P1]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[T:%.*]] = load , * [[P]] +; CHECK-NEXT: store [[T]], * [[Q:%.*]] +; CHECK-NEXT: ret void +; CHECK: if.else: +; CHECK-NEXT: ret void +; +entry: + store zeroinitializer, * %p + %p1 = getelementptr , * %p, i64 1 + store %v, * %p1 + br i1 %c, label %if.else, label %if.then + +if.then: + %t = load , * %p ; load could be eliminated + store %t, * %q + ret void + +if.else: + ret void +} -- 2.7.4