From d889e17eca8e8c43c6e4a2439fae5a1a40823623 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Fri, 31 Jul 2020 11:10:00 +0700 Subject: [PATCH] [SimpleLoopUnswitch] Drop make.implicit metadata in case of non-trivial unswitching Non-trivial unswitching simply moves terminator being unswitch from the loop up to the switch block. It also preserves all metadata that was there. It might not be a correct thing to do for `make.implicit` metadata. Consider case: ``` for (...) { cond = // computed in loop if (cond) return X; if (p == null) throw_npe(); !make implicit } ``` Before the unswitching, if `p` is null and we reach this check, we are guaranteed to go to `throw_npe()` block. Now we unswitch on `p == null` condition: ``` if (p == null) !make implicit { for (...) { if (cond) return X; throw_npe() } } else { for (...) { if (cond) return X; } } ``` Now, following `true` branch of `p == null` does not always lead us to `throw_npe()` because the loop has side exit. Now, if we run ImplicitNullCheck pass on this code, it may end up making the unswitch condition implicit. This may lead us to turning normal path to `return X` into signal-throwing path, which is not efficient. Note that this does not happen during trivial unswitch: it guarantees that we do not have side exits before condition being unswitched. This patch fixes this situation by unconditional dropping of `make.implicit` metadata when we perform non-trivial unswitch. We could preserve it if we could prove that the condition always executes. This can be done as a follow-up. Differential Revision: https://reviews.llvm.org/D84916 Reviewed By: asbirlea --- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 7 + .../SimpleLoopUnswitch/implicit-null-checks.ll | 211 +++++++++++++++++++++ 2 files changed, 218 insertions(+) create mode 100644 llvm/test/Transforms/SimpleLoopUnswitch/implicit-null-checks.ll diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index c7d23c9..c99fd63 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -2070,6 +2070,13 @@ static void unswitchNontrivialInvariants( DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU); } + // Drop metadata if we may break its semantics by moving this branch into the + // split block. + // TODO: We can keep make.implicit metadata if we prove that TI always + // executes and we cannot leave unswitched loop before getting to null check + // block. See @test_may_keep_make_implicit_non_trivial. + TI.setMetadata(LLVMContext::MD_make_implicit, nullptr); + // The stitching of the branched code back together depends on whether we're // doing full unswitching or not with the exception that we always want to // nuke the initial terminator placed in the split block. diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/implicit-null-checks.ll b/llvm/test/Transforms/SimpleLoopUnswitch/implicit-null-checks.ll new file mode 100644 index 0000000..32834d2 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/implicit-null-checks.ll @@ -0,0 +1,211 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -enable-nontrivial-unswitch=true -simple-loop-unswitch -S < %s | FileCheck %s +; RUN: opt -enable-nontrivial-unswitch=true -passes='loop(unswitch),verify' -S < %s | FileCheck %s + +declare void @throw_npe() + +; It is illegal to preserve make_implicit notion of the condition being +; unswitched because we may exit loop before we reach the condition, so +; there is no guarantee that following implicit branch always means getting +; to throw_npe block. +define i32 @test_should_drop_make_implicit(i32* %p1, i32* %p2) { +; CHECK-LABEL: @test_should_drop_make_implicit( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NULL_CHECK:%.*]] = icmp eq i32* [[P2:%.*]], null +; CHECK-NOT: !make.implicit +; CHECK-NEXT: br i1 [[NULL_CHECK]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_US:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ] +; CHECK-NEXT: [[X_US:%.*]] = load i32, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: [[SIDE_EXIT_COND_US:%.*]] = icmp eq i32 [[X_US]], 0 +; CHECK-NEXT: br i1 [[SIDE_EXIT_COND_US]], label [[SIDE_EXIT_SPLIT_US:%.*]], label [[NULL_CHECK_BLOCK_US:%.*]] +; CHECK: null_check_block.us: +; CHECK-NEXT: br label [[THROW_NPE_SPLIT_US:%.*]] +; CHECK: side_exit.split.us: +; CHECK-NEXT: br label [[SIDE_EXIT:%.*]] +; CHECK: throw_npe.split.us: +; CHECK-NEXT: br label [[THROW_NPE:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P1]], align 4 +; CHECK-NEXT: [[SIDE_EXIT_COND:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: br i1 [[SIDE_EXIT_COND]], label [[SIDE_EXIT_SPLIT:%.*]], label [[NULL_CHECK_BLOCK:%.*]] +; CHECK: null_check_block: +; CHECK-NEXT: br label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], 10000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: side_exit.split: +; CHECK-NEXT: br label [[SIDE_EXIT]] +; CHECK: side_exit: +; CHECK-NEXT: ret i32 0 +; CHECK: throw_npe: +; CHECK-NEXT: call void @throw_npe() +; CHECK-NEXT: unreachable +; CHECK: exit: +; CHECK-NEXT: [[X_LCSSA2:%.*]] = phi i32 [ [[X]], [[BACKEDGE]] ] +; CHECK-NEXT: ret i32 [[X_LCSSA2]] +; +entry: + %null_check = icmp eq i32* %p2, null + br label %loop +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %x = load i32, i32* %p1 + %side_exit_cond = icmp eq i32 %x, 0 + br i1 %side_exit_cond, label %side_exit, label %null_check_block + +null_check_block: + br i1 %null_check, label %throw_npe, label %backedge, !make.implicit !0 + +backedge: + %iv.next = add i32 %iv,1 + %loop_cond = icmp slt i32 %iv.next, 10000 + br i1 %loop_cond, label %loop, label %exit + +side_exit: + ret i32 0 + +throw_npe: + call void @throw_npe() + unreachable + +exit: + ret i32 %x +} + +; Here make.implicit notion may be preserved because we always get to throw_npe +; after following true branch. This is a trivial unswitch. +define i32 @test_may_keep_make_implicit_trivial(i32* %p1, i32* %p2) { +; CHECK-LABEL: @test_may_keep_make_implicit_trivial( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NULL_CHECK:%.*]] = icmp eq i32* [[P2:%.*]], null +; CHECK-NEXT: br i1 [[NULL_CHECK]], label [[THROW_NPE:%.*]], label [[ENTRY_SPLIT:%.*]], !make.implicit !0 +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: [[SIDE_EXIT_COND:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: br label [[SIDE_EXIT_BLOCK:%.*]] +; CHECK: side_exit_block: +; CHECK-NEXT: br i1 [[SIDE_EXIT_COND]], label [[SIDE_EXIT:%.*]], label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], 10000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: side_exit: +; CHECK-NEXT: ret i32 0 +; CHECK: throw_npe: +; CHECK-NEXT: call void @throw_npe() +; CHECK-NEXT: unreachable +; CHECK: exit: +; CHECK-NEXT: [[X_LCSSA2:%.*]] = phi i32 [ [[X]], [[BACKEDGE]] ] +; CHECK-NEXT: ret i32 [[X_LCSSA2]] +; +entry: + %null_check = icmp eq i32* %p2, null + br label %loop +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %x = load i32, i32* %p1 + %side_exit_cond = icmp eq i32 %x, 0 + br i1 %null_check, label %throw_npe, label %side_exit_block, !make.implicit !0 + +side_exit_block: + br i1 %side_exit_cond, label %side_exit, label %backedge + +backedge: + %iv.next = add i32 %iv,1 + %loop_cond = icmp slt i32 %iv.next, 10000 + br i1 %loop_cond, label %loop, label %exit + +side_exit: + ret i32 0 + +throw_npe: + call void @throw_npe() + unreachable + +exit: + ret i32 %x +} + +; TODO: This is a non-trivial unswitch, but it would still be legal to keep +; !make.implicit in entry block. Currently we do not have enough analysis to +; prove it. +define i32 @test_may_keep_make_implicit_non_trivial(i32* %p1, i32* %p2) { +; CHECK-LABEL: @test_may_keep_make_implicit_non_trivial( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NULL_CHECK:%.*]] = icmp eq i32* [[P2:%.*]], null +; CHECK-NOT: !make.implicit +; CHECK-NEXT: br i1 [[NULL_CHECK]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]] +; CHECK: entry.split.us: +; CHECK-NEXT: br label [[LOOP_US:%.*]] +; CHECK: loop.us: +; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ] +; CHECK-NEXT: [[X_US:%.*]] = load i32, i32* [[P1:%.*]], align 4 +; CHECK-NEXT: [[INNER_BLOCK_COND_US:%.*]] = icmp eq i32 [[X_US]], 0 +; CHECK-NEXT: br i1 [[INNER_BLOCK_COND_US]], label [[INNER_BLOCK_US:%.*]], label [[NULL_CHECK_BLOCK_US:%.*]] +; CHECK: inner_block.us: +; CHECK-NEXT: br label [[NULL_CHECK_BLOCK_US]] +; CHECK: null_check_block.us: +; CHECK-NEXT: br label [[THROW_NPE_SPLIT_US:%.*]] +; CHECK: throw_npe.split.us: +; CHECK-NEXT: br label [[THROW_NPE:%.*]] +; CHECK: entry.split: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P1]], align 4 +; CHECK-NEXT: [[INNER_BLOCK_COND:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: br i1 [[INNER_BLOCK_COND]], label [[INNER_BLOCK:%.*]], label [[NULL_CHECK_BLOCK:%.*]] +; CHECK: inner_block: +; CHECK-NEXT: br label [[NULL_CHECK_BLOCK]] +; CHECK: null_check_block: +; CHECK-NEXT: br label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], 10000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: throw_npe: +; CHECK-NEXT: call void @throw_npe() +; CHECK-NEXT: unreachable +; CHECK: exit: +; CHECK-NEXT: [[X_LCSSA1:%.*]] = phi i32 [ [[X]], [[BACKEDGE]] ] +; CHECK-NEXT: ret i32 [[X_LCSSA1]] +; +entry: + %null_check = icmp eq i32* %p2, null + br label %loop +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %x = load i32, i32* %p1 + %inner_block_cond = icmp eq i32 %x, 0 + br i1 %inner_block_cond, label %inner_block, label %null_check_block + +inner_block: + br label %null_check_block + +null_check_block: + br i1 %null_check, label %throw_npe, label %backedge, !make.implicit !0 + +backedge: + %iv.next = add i32 %iv,1 + %loop_cond = icmp slt i32 %iv.next, 10000 + br i1 %loop_cond, label %loop, label %exit + +throw_npe: + call void @throw_npe() + unreachable + +exit: + ret i32 %x +} + +!0 = !{} -- 2.7.4