From 5f0d0e60d11b8d2e48aacf31a82762280f9a8712 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Thu, 25 Aug 2016 11:55:47 +0000 Subject: [PATCH] GVN-hoist: fix hoistingFromAllPaths for loops (PR29034) It is invalid to hoist stores or loads if they are not executed on all paths from the hoisting point to the exit of the function. In the testcase, there are paths in the loop that do not execute the stores or the loads, and so hoisting them within the loop is unsafe. The problem is that the current implementation of hoistingFromAllPaths is incomplete: it walks all blocks dominated by the hoisting point, and does not return false when the loop contains a path on which the hoisted ld/st is not executed. Differential Revision: https://reviews.llvm.org/D23843 llvm-svn: 279732 --- llvm/lib/Transforms/Scalar/GVNHoist.cpp | 77 +++++++++++-------- llvm/test/Transforms/GVNHoist/pr29031.ll | 51 +++++++++++++ llvm/test/Transforms/GVNHoist/pr29034.ll | 122 +++++++++++++++++++++++++++++++ 3 files changed, 219 insertions(+), 31 deletions(-) create mode 100644 llvm/test/Transforms/GVNHoist/pr29031.ll create mode 100644 llvm/test/Transforms/GVNHoist/pr29034.ll diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp index 53f76df..e287ba7 100644 --- a/llvm/lib/Transforms/Scalar/GVNHoist.cpp +++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -273,24 +273,32 @@ private: return false; } - // Return true when all paths from A to the end of the function pass through - // either B or C. - bool hoistingFromAllPaths(const BasicBlock *A, const BasicBlock *B, - const BasicBlock *C) { - // We fully copy the WL in order to be able to remove items from it. - SmallPtrSet WL; - WL.insert(B); - WL.insert(C); - - for (auto It = df_begin(A), E = df_end(A); It != E;) { - // There exists a path from A to the exit of the function if we are still - // iterating in DF traversal and we removed all instructions from the work - // list. - if (WL.empty()) + // Return true when a successor of BB dominates A. + bool successorDominate(const BasicBlock *BB, const BasicBlock *A) { + for (const BasicBlock *Succ : BB->getTerminator()->successors()) + if (DT->dominates(Succ, A)) + return true; + + return false; + } + + // Return true when all paths from HoistBB to the end of the function pass + // through one of the blocks in WL. + bool hoistingFromAllPaths(const BasicBlock *HoistBB, + SmallPtrSetImpl &WL) { + + // Copy WL as the loop will remove elements from it. + SmallPtrSet WorkList(WL.begin(), WL.end()); + + for (auto It = df_begin(HoistBB), E = df_end(HoistBB); It != E;) { + // There exists a path from HoistBB to the exit of the function if we are + // still iterating in DF traversal and we removed all instructions from + // the work list. + if (WorkList.empty()) return false; const BasicBlock *BB = *It; - if (WL.erase(BB)) { + if (WorkList.erase(BB)) { // Stop DFS traversal when BB is in the work list. It.skipChildren(); continue; @@ -300,6 +308,11 @@ private: if (!isGuaranteedToTransferExecutionToSuccessor(BB->getTerminator())) return false; + // When reaching the back-edge of a loop, there may be a path through the + // loop that does not pass through B or C before exiting the loop. + if (successorDominate(BB, HoistBB)) + return false; + // Increment DFS traversal when not skipping children. ++It; } @@ -476,23 +489,21 @@ private: return true; } - // Return true when it is safe to hoist scalar instructions from BB1 and BB2 - // to HoistBB. - bool safeToHoistScalar(const BasicBlock *HoistBB, const BasicBlock *BB1, - const BasicBlock *BB2, int &NBBsOnAllPaths) { - // Check that the hoisted expression is needed on all paths. When HoistBB - // already contains an instruction to be hoisted, the expression is needed - // on all paths. Enable scalar hoisting at -Oz as it is safe to hoist - // scalars to a place where they are partially needed. - if (!OptForMinSize && BB1 != HoistBB && - !hoistingFromAllPaths(HoistBB, BB1, BB2)) + // Return true when it is safe to hoist scalar instructions from all blocks in + // WL to HoistBB. + bool safeToHoistScalar(const BasicBlock *HoistBB, + SmallPtrSetImpl &WL, + int &NBBsOnAllPaths) { + // Check that the hoisted expression is needed on all paths. Enable scalar + // hoisting at -Oz as it is safe to hoist scalars to a place where they are + // partially needed. + if (!OptForMinSize && !hoistingFromAllPaths(HoistBB, WL)) return false; - if (hasEHOnPath(HoistBB, BB1, NBBsOnAllPaths) || - hasEHOnPath(HoistBB, BB2, NBBsOnAllPaths)) - return false; + for (const BasicBlock *BB : WL) + if (hasEHOnPath(HoistBB, BB, NBBsOnAllPaths)) + return false; - // Safe to hoist scalars from BB1 and BB2 to HoistBB. return true; } @@ -542,8 +553,12 @@ private: NewHoistPt = NewHoistBB->getTerminator(); } + SmallPtrSet WL; + WL.insert(HoistBB); + WL.insert(BB); + if (K == InsKind::Scalar) { - if (safeToHoistScalar(NewHoistBB, HoistBB, BB, NBBsOnAllPaths)) { + if (safeToHoistScalar(NewHoistBB, WL, NBBsOnAllPaths)) { // Extend HoistPt to NewHoistPt. HoistPt = NewHoistPt; HoistBB = NewHoistBB; @@ -557,7 +572,7 @@ private: // loading from the same address: for instance there may be a branch on // which the address of the load may not be initialized. if ((HoistBB == NewHoistBB || BB == NewHoistBB || - hoistingFromAllPaths(NewHoistBB, HoistBB, BB)) && + hoistingFromAllPaths(NewHoistBB, WL)) && // Also check that it is safe to move the load or store from HoistPt // to NewHoistPt, and from Insn to NewHoistPt. safeToHoistLdSt(NewHoistPt, HoistPt, UD, K, NBBsOnAllPaths) && diff --git a/llvm/test/Transforms/GVNHoist/pr29031.ll b/llvm/test/Transforms/GVNHoist/pr29031.ll new file mode 100644 index 0000000..cd51872 --- /dev/null +++ b/llvm/test/Transforms/GVNHoist/pr29031.ll @@ -0,0 +1,51 @@ +; RUN: opt -S -gvn-hoist < %s | FileCheck %s + +; Check that the stores are not hoisted: it is invalid to hoist stores if they +; are not executed on all paths. In this testcase, there are paths in the loop +; that do not execute the stores. + +; CHECK-LABEL: define i32 @main +; CHECK: store +; CHECK: store +; CHECK: store + +@a = global i32 0, align 4 + +define i32 @main() { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc5, %entry + %0 = load i32, i32* @a, align 4 + %cmp = icmp slt i32 %0, 1 + br i1 %cmp, label %for.cond1, label %for.end7 + +for.cond1: ; preds = %for.cond, %for.inc + %1 = load i32, i32* @a, align 4 + %cmp2 = icmp slt i32 %1, 1 + br i1 %cmp2, label %for.body3, label %for.inc5 + +for.body3: ; preds = %for.cond1 + %tobool = icmp ne i32 %1, 0 + br i1 %tobool, label %if.then, label %for.inc + +if.then: ; preds = %for.body3 + %inc = add nsw i32 %1, 1 + store i32 %inc, i32* @a, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3, %if.then + %2 = load i32, i32* @a, align 4 + %inc4 = add nsw i32 %2, 1 + store i32 %inc4, i32* @a, align 4 + br label %for.cond1 + +for.inc5: ; preds = %for.cond1 + %inc6 = add nsw i32 %1, 1 + store i32 %inc6, i32* @a, align 4 + br label %for.cond + +for.end7: ; preds = %for.cond + ret i32 %0 +} + diff --git a/llvm/test/Transforms/GVNHoist/pr29034.ll b/llvm/test/Transforms/GVNHoist/pr29034.ll new file mode 100644 index 0000000..5e725ad --- /dev/null +++ b/llvm/test/Transforms/GVNHoist/pr29034.ll @@ -0,0 +1,122 @@ +; RUN: opt -S -gvn-hoist < %s | FileCheck %s + +; Check that the stores are not hoisted: it is invalid to hoist stores if they +; are not executed on all paths. In this testcase, there are paths in the loop +; that do not execute the stores. + +; CHECK-LABEL: define void @music_task +; CHECK: store +; CHECK: store +; CHECK: store + + +%struct._MUSIC_OP_API_ = type { %struct._FILE_OPERATE_*, %struct.__MUSIC_API* } +%struct._FILE_OPERATE_ = type { %struct._FILE_OPERATE_INIT_*, %struct._lg_dev_info_* } +%struct._FILE_OPERATE_INIT_ = type { i32, i32, i32, i32, i32*, i8*, i32 } +%struct._lg_dev_info_ = type { %struct.os_event, i32, i32, %struct._lg_dev_hdl_*, i8, i8, i8, i8, i8 } +%struct.os_event = type { i8, i32, i8*, %union.anon } +%union.anon = type { %struct.event_cnt } +%struct.event_cnt = type { i16 } +%struct._lg_dev_hdl_ = type { i8*, i8*, i8*, i8*, i8* } +%struct.__MUSIC_API = type <{ i8*, i8*, i32, %struct._DEC_API, %struct._DEC_API_IO*, %struct._FS_BRK_POINT* }> +%struct._DEC_API = type { %struct._DEC_PHY*, i8*, i8*, i8* (i8*)*, i32* (i8*)*, i8*, %struct._AAC_DEFAULT_SETTING, i32, i32, i8*, %struct.decoder_inf*, i32, i8, i8*, i8, i8* } +%struct._DEC_PHY = type { i8*, %struct.__audio_decoder_ops*, i8*, %struct.if_decoder_io, %struct.if_dec_file*, i8*, i32 (i8*)*, i32, i8, %struct.__FF_FR } +%struct.__audio_decoder_ops = type { i8*, i32 (i8*, %struct.if_decoder_io*, i8*)*, i32 (i8*)*, i32 (i8*, i32)*, %struct.decoder_inf* (i8*)*, i32 (i8*)*, i32 (i8*)*, i32 (...)*, i32 (...)*, i32 (...)*, void (i8*, i32)*, void (i8*, i32, i8*, i32)*, i32 (i8*, i32, i8*)* } +%struct.if_decoder_io = type { i8*, i32 (i8*, i32, i8*, i32, i8)*, i32 (i8*, i32, i8*)*, void (i8*, i8*, i32)*, i32 (i8*)*, i32 (i8*, i32, i32)* } +%struct.if_dec_file = type { i32 (i8*, i8*, i32)*, i32 (i8*, i32, i32)* } +%struct.__FF_FR = type { i32, i32, i8, i8, i8 } +%struct._AAC_DEFAULT_SETTING = type { i32, i32, i32 } +%struct.decoder_inf = type { i16, i16, i32, i32 } +%struct._DEC_API_IO = type { i8*, i8*, i16 (i8*, i8*, i16)*, i32 (i8*, i8, i32)*, i32 (%struct.decoder_inf*, i32)*, %struct.__OP_IO, i32, i32 } +%struct.__OP_IO = type { i8*, i8* (i8*, i8*, i32)* } +%struct._FS_BRK_POINT = type { %struct._FS_BRK_INFO, i32, i32 } +%struct._FS_BRK_INFO = type { i32, i32, [8 x i8], i8, i8, i16 } + +@.str = external hidden unnamed_addr constant [10 x i8], align 1 + +define void @music_task(i8* nocapture readnone %p) local_unnamed_addr { +entry: + %mapi = alloca %struct._MUSIC_OP_API_*, align 8 + %0 = bitcast %struct._MUSIC_OP_API_** %mapi to i8* + call void @llvm.lifetime.start(i64 8, i8* %0) + store %struct._MUSIC_OP_API_* null, %struct._MUSIC_OP_API_** %mapi, align 8, !tbaa !1 + %call = call i32 @music_decoder_init(%struct._MUSIC_OP_API_** nonnull %mapi) + br label %while.cond + +while.cond.loopexit: ; preds = %while.cond2 + br label %while.cond + +while.cond: ; preds = %while.cond.loopexit, %entry + %1 = load %struct._MUSIC_OP_API_*, %struct._MUSIC_OP_API_** %mapi, align 8, !tbaa !1 + %dop_api = getelementptr inbounds %struct._MUSIC_OP_API_, %struct._MUSIC_OP_API_* %1, i64 0, i32 1 + %2 = load %struct.__MUSIC_API*, %struct.__MUSIC_API** %dop_api, align 8, !tbaa !5 + %file_num = getelementptr inbounds %struct.__MUSIC_API, %struct.__MUSIC_API* %2, i64 0, i32 2 + %3 = bitcast i32* %file_num to i8* + %call1 = call i32 @music_play_api(%struct._MUSIC_OP_API_* %1, i32 33, i32 0, i32 28, i8* %3) + br label %while.cond2 + +while.cond2: ; preds = %while.cond2.backedge, %while.cond + %err.0 = phi i32 [ %call1, %while.cond ], [ %err.0.be, %while.cond2.backedge ] + switch i32 %err.0, label %sw.default [ + i32 0, label %while.cond.loopexit + i32 35, label %sw.bb + i32 11, label %sw.bb7 + i32 12, label %sw.bb13 + ] + +sw.bb: ; preds = %while.cond2 + %4 = load %struct._MUSIC_OP_API_*, %struct._MUSIC_OP_API_** %mapi, align 8, !tbaa !1 + %dop_api4 = getelementptr inbounds %struct._MUSIC_OP_API_, %struct._MUSIC_OP_API_* %4, i64 0, i32 1 + %5 = load %struct.__MUSIC_API*, %struct.__MUSIC_API** %dop_api4, align 8, !tbaa !5 + %file_num5 = getelementptr inbounds %struct.__MUSIC_API, %struct.__MUSIC_API* %5, i64 0, i32 2 + %6 = load i32, i32* %file_num5, align 1, !tbaa !7 + %call6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i64 0, i64 0), i32 %6) + br label %while.cond2.backedge + +sw.bb7: ; preds = %while.cond2 + %7 = load %struct._MUSIC_OP_API_*, %struct._MUSIC_OP_API_** %mapi, align 8, !tbaa !1 + %dop_api8 = getelementptr inbounds %struct._MUSIC_OP_API_, %struct._MUSIC_OP_API_* %7, i64 0, i32 1 + %8 = load %struct.__MUSIC_API*, %struct.__MUSIC_API** %dop_api8, align 8, !tbaa !5 + %file_num9 = getelementptr inbounds %struct.__MUSIC_API, %struct.__MUSIC_API* %8, i64 0, i32 2 + store i32 1, i32* %file_num9, align 1, !tbaa !7 + %9 = bitcast i32* %file_num9 to i8* + %call12 = call i32 @music_play_api(%struct._MUSIC_OP_API_* %7, i32 34, i32 0, i32 24, i8* %9) + br label %while.cond2.backedge + +sw.bb13: ; preds = %while.cond2 + %10 = load %struct._MUSIC_OP_API_*, %struct._MUSIC_OP_API_** %mapi, align 8, !tbaa !1 + %dop_api14 = getelementptr inbounds %struct._MUSIC_OP_API_, %struct._MUSIC_OP_API_* %10, i64 0, i32 1 + %11 = load %struct.__MUSIC_API*, %struct.__MUSIC_API** %dop_api14, align 8, !tbaa !5 + %file_num15 = getelementptr inbounds %struct.__MUSIC_API, %struct.__MUSIC_API* %11, i64 0, i32 2 + store i32 1, i32* %file_num15, align 1, !tbaa !7 + %12 = bitcast i32* %file_num15 to i8* + %call18 = call i32 @music_play_api(%struct._MUSIC_OP_API_* %10, i32 35, i32 0, i32 26, i8* %12) + br label %while.cond2.backedge + +sw.default: ; preds = %while.cond2 + %13 = load %struct._MUSIC_OP_API_*, %struct._MUSIC_OP_API_** %mapi, align 8, !tbaa !1 + %call19 = call i32 @music_play_api(%struct._MUSIC_OP_API_* %13, i32 33, i32 0, i32 22, i8* null) + br label %while.cond2.backedge + +while.cond2.backedge: ; preds = %sw.default, %sw.bb13, %sw.bb7, %sw.bb + %err.0.be = phi i32 [ %call19, %sw.default ], [ %call18, %sw.bb13 ], [ %call12, %sw.bb7 ], [ 0, %sw.bb ] + br label %while.cond2 +} + +declare void @llvm.lifetime.start(i64, i8* nocapture) +declare i32 @music_decoder_init(%struct._MUSIC_OP_API_**) +declare i32 @music_play_api(%struct._MUSIC_OP_API_*, i32, i32, i32, i8*) +declare i32 @printf(i8* nocapture readonly, ...) + +!0 = !{!"clang version 4.0.0 "} +!1 = !{!2, !2, i64 0} +!2 = !{!"any pointer", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} +!5 = !{!6, !2, i64 8} +!6 = !{!"_MUSIC_OP_API_", !2, i64 0, !2, i64 8} +!7 = !{!8, !9, i64 16} +!8 = !{!"__MUSIC_API", !2, i64 0, !2, i64 8, !9, i64 16, !10, i64 20, !2, i64 140, !2, i64 148} +!9 = !{!"int", !3, i64 0} +!10 = !{!"_DEC_API", !2, i64 0, !2, i64 8, !2, i64 16, !2, i64 24, !2, i64 32, !2, i64 40, !11, i64 48, !9, i64 60, !9, i64 64, !2, i64 72, !2, i64 80, !9, i64 88, !3, i64 92, !2, i64 96, !3, i64 104, !2, i64 112} +!11 = !{!"_AAC_DEFAULT_SETTING", !9, i64 0, !9, i64 4, !9, i64 8} -- 2.7.4