From e9393789a9fa95ea1d7df71aa8f164f043d7da33 Mon Sep 17 00:00:00 2001 From: bipmis Date: Tue, 29 Nov 2022 10:53:51 +0000 Subject: [PATCH] [AggressiveInstCombine] Handle the insert point of the merged load correctly. This patch updates the load insert point of the merged load in AggressiveInstCombine(). This is done to handle the reported test breaks by handling Alias Analysis correctly. Differential Revision: https://reviews.llvm.org/D137201 --- .../AggressiveInstCombine.cpp | 46 +++-- .../AggressiveInstCombine/AArch64/or-load.ll | 136 +++++++------ .../AggressiveInstCombine/X86/or-load.ll | 218 ++++++++++++--------- 3 files changed, 234 insertions(+), 166 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index ed5dba0..5da7dbb 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -610,6 +610,7 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) { /// shift amount, zero extend type and loadSize. struct LoadOps { LoadInst *Root = nullptr; + LoadInst *RootInsert = nullptr; bool FoundRoot = false; uint64_t LoadSize = 0; Value *Shift = nullptr; @@ -675,16 +676,6 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2, /* AllowNonInbounds */ true); - // Make sure Load with lower Offset is at LI1 - bool Reverse = false; - if (Offset2.slt(Offset1)) { - std::swap(LI1, LI2); - std::swap(ShAmt1, ShAmt2); - std::swap(Offset1, Offset2); - std::swap(Load1Ptr, Load2Ptr); - Reverse = true; - } - // Verify if both loads have same base pointers and load sizes are same. uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits(); uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits(); @@ -695,20 +686,36 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, if (LoadSize1 < 8 || !isPowerOf2_64(LoadSize1)) return false; - // TODO: Alias Analysis to check for stores b/w the loads. - // Currently bail out if there are stores b/w the loads. - LoadInst *Start = LI1, *End = LI2; - if (!LI1->comesBefore(LI2)) + // Alias Analysis to check for stores b/w the loads. + LoadInst *Start = LOps.FoundRoot ? LOps.RootInsert : LI1, *End = LI2; + MemoryLocation Loc; + if (!Start->comesBefore(End)) { std::swap(Start, End); + Loc = MemoryLocation::get(End); + if (LOps.FoundRoot) + Loc = Loc.getWithNewSize(LOps.LoadSize); + } else + Loc = MemoryLocation::get(End); unsigned NumScanned = 0; for (Instruction &Inst : make_range(Start->getIterator(), End->getIterator())) { - if (Inst.mayWriteToMemory()) + if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc))) return false; if (++NumScanned > MaxInstrsToScan) return false; } + // Make sure Load with lower Offset is at LI1 + bool Reverse = false; + if (Offset2.slt(Offset1)) { + std::swap(LI1, LI2); + std::swap(ShAmt1, ShAmt2); + std::swap(Offset1, Offset2); + std::swap(Load1Ptr, Load2Ptr); + std::swap(LoadSize1, LoadSize2); + Reverse = true; + } + // Big endian swap the shifts if (IsBigEndian) std::swap(ShAmt1, ShAmt2); @@ -746,6 +753,7 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, AATags1 = LI1->getAAMetadata(); } LOps.LoadSize = LoadSize1 + LoadSize2; + LOps.RootInsert = Start; // Concatenate the AATags of the Merged Loads. LOps.AATags = AATags1.concat(AATags2); @@ -781,9 +789,15 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, if (!Allowed || !Fast) return false; + // Make sure the Load pointer of type GEP/non-GEP is above insert point + Instruction *Inst = dyn_cast(LI1->getPointerOperand()); + if (Inst && Inst->getParent() == LI1->getParent() && + !Inst->comesBefore(LOps.RootInsert)) + Inst->moveBefore(LOps.RootInsert); + // New load can be generated Value *Load1Ptr = LI1->getPointerOperand(); - Builder.SetInsertPoint(LI1); + Builder.SetInsertPoint(LOps.RootInsert); Value *NewPtr = Builder.CreateBitCast(Load1Ptr, WiderType->getPointerTo(AS)); NewLoad = Builder.CreateAlignedLoad(WiderType, NewPtr, LI1->getAlign(), LI1->isVolatile(), ""); diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll index 75aa414..8087137 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll @@ -142,26 +142,31 @@ define i32 @loadCombine_4consecutive_BE(ptr %p) { } define i32 @loadCombine_4consecutive_alias(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_alias( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: store i8 10, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_alias( +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: store i8 10, ptr [[P]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_alias( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: store i8 10, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -188,26 +193,31 @@ define i32 @loadCombine_4consecutive_alias(ptr %p) { } define i32 @loadCombine_4consecutive_alias_BE(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_alias_BE( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: store i8 10, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 -; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_alias_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: store i8 10, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_alias_BE( +; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; BE-NEXT: store i8 10, ptr [[P]], align 1 +; BE-NEXT: ret i32 [[L1]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1760,26 +1770,32 @@ define i16 @loadCombine_2consecutive_badinsert(ptr %p) { } define i32 @loadCombine_4consecutive_badinsert(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_badinsert( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: store i8 0, ptr [[P1]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_badinsert( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 1 +; LE-NEXT: store i8 0, ptr [[P1]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_badinsert( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: store i8 0, ptr [[P1]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll index 2c672f9..de61417 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll @@ -150,26 +150,31 @@ define i32 @loadCombine_4consecutive_BE(ptr %p) { } define i32 @loadCombine_4consecutive_alias(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_alias( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: store i8 10, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_alias( +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: store i8 10, ptr [[P]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_alias( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: store i8 10, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -196,26 +201,31 @@ define i32 @loadCombine_4consecutive_alias(ptr %p) { } define i32 @loadCombine_4consecutive_alias_BE(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_alias_BE( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: store i8 10, ptr [[P]], align 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 -; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_alias_BE( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; LE-NEXT: store i8 10, ptr [[P]], align 1 +; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_alias_BE( +; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 +; BE-NEXT: store i8 10, ptr [[P]], align 1 +; BE-NEXT: ret i32 [[L1]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1851,16 +1861,22 @@ define i32 @loadCombine_4consecutive_lower_index_comes_before(ptr %p) { } define i16 @loadCombine_2consecutive_badinsert(ptr %p) { -; ALL-LABEL: @loadCombine_2consecutive_badinsert( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: store i8 0, ptr [[P1]], align 1 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 -; ALL-NEXT: [[S2:%.*]] = shl i16 [[E2]], 8 -; ALL-NEXT: [[O1:%.*]] = or i16 [[E1]], [[S2]] -; ALL-NEXT: ret i16 [[O1]] +; LE-LABEL: @loadCombine_2consecutive_badinsert( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 +; LE-NEXT: store i8 0, ptr [[P1]], align 1 +; LE-NEXT: ret i16 [[L1]] +; +; BE-LABEL: @loadCombine_2consecutive_badinsert( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: store i8 0, ptr [[P1]], align 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 +; BE-NEXT: [[S2:%.*]] = shl i16 [[E2]], 8 +; BE-NEXT: [[O1:%.*]] = or i16 [[E1]], [[S2]] +; BE-NEXT: ret i16 [[O1]] ; %p1 = getelementptr i8, ptr %p, i32 1 %l2 = load i8, ptr %p1 @@ -1874,26 +1890,32 @@ define i16 @loadCombine_2consecutive_badinsert(ptr %p) { } define i32 @loadCombine_4consecutive_badinsert(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_badinsert( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: store i8 0, ptr [[P1]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_badinsert( +; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 1 +; LE-NEXT: store i8 0, ptr [[P1]], align 1 +; LE-NEXT: ret i32 [[L1]] +; +; BE-LABEL: @loadCombine_4consecutive_badinsert( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: store i8 0, ptr [[P1]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1920,26 +1942,42 @@ define i32 @loadCombine_4consecutive_badinsert(ptr %p) { } define i32 @loadCombine_4consecutive_badinsert2(ptr %p) { -; ALL-LABEL: @loadCombine_4consecutive_badinsert2( -; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; ALL-NEXT: store i8 0, ptr [[P3]], align 1 -; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; ALL-NEXT: ret i32 [[O3]] +; LE-LABEL: @loadCombine_4consecutive_badinsert2( +; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2 +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 +; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32 +; LE-NEXT: store i8 0, ptr [[P3]], align 1 +; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; LE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; LE-NEXT: [[O2:%.*]] = or i32 [[TMP1]], [[S3]] +; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_badinsert2( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: store i8 0, ptr [[P3]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 -- 2.7.4