From fc0e245464bc29d0025e02471d5f1ae6b74ac19c Mon Sep 17 00:00:00 2001 From: Wei Mi Date: Tue, 25 Jul 2017 23:37:17 +0000 Subject: [PATCH] Disable loop unswitching for some patterns containing equality comparison with undef. This is a workaround for the bug described in PR31652 and http://lists.llvm.org/pipermail/llvm-dev/2017-July/115497.html. The temporary solution is to add a function EqualityPropUnSafe. In EqualityPropUnSafe, for some simple patterns we can know the equality comparison may contains undef, so we regard such comparison as unsafe and will not do loop-unswitching for them. We also need to disable the select simplification when one of select operand is undef and its result feeds into equality comparison. The patch cannot clear the safety issue caused by the bug, but it can suppress the issue from happening to some extent. Differential Revision: https://reviews.llvm.org/D35811 llvm-svn: 309059 --- .../Transforms/InstCombine/InstCombineSelect.cpp | 16 +++ llvm/lib/Transforms/Scalar/LoopUnswitch.cpp | 51 ++++++++- .../LoopUnswitch/unswitch-equality-undef.ll | 121 +++++++++++++++++++++ 3 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/LoopUnswitch/unswitch-equality-undef.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 4eebe82..2f700cb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1157,6 +1157,22 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *FalseVal = SI.getFalseValue(); Type *SelType = SI.getType(); + // FIXME: Remove this workaround when freeze related patches are done. + // For select with undef operand which feeds into an equality comparison, + // don't simplify it so loop unswitch can know the equality comparison + // may have an undef operand. This is a workaround for PR31652 caused by + // descrepancy about branch on undef between LoopUnswitch and GVN. + if (isa(TrueVal) || isa(FalseVal)) { + if (any_of(SI.users(), [&](User *U) { + ICmpInst *CI = dyn_cast(U); + if (CI && CI->isEquality()) + return true; + return false; + })) { + return nullptr; + } + } + if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, SQ.getWithInstruction(&SI))) return replaceInstUsesWith(SI, V); diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp index d0c96fa..e434b86 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -553,6 +553,48 @@ bool LoopUnswitch::isUnreachableDueToPreviousUnswitching(BasicBlock *BB) { return false; } +/// FIXME: Remove this workaround when freeze related patches are done. +/// LoopUnswitch and Equality propagation in GVN have discrepancy about +/// whether branch on undef/poison has undefine behavior. Here it is to +/// rule out some common cases that we found such discrepancy already +/// causing problems. Detail could be found in PR31652. Note if the +/// func returns true, it is unsafe. But if it is false, it doesn't mean +/// it is necessarily safe. +static bool EqualityPropUnSafe(Value &LoopCond) { + ICmpInst *CI = dyn_cast(&LoopCond); + if (!CI || !CI->isEquality()) + return false; + + Value *LHS = CI->getOperand(0); + Value *RHS = CI->getOperand(1); + if (isa(LHS) || isa(RHS)) + return true; + + auto hasUndefInPHI = [](PHINode &PN) { + for (Value *Opd : PN.incoming_values()) { + if (isa(Opd)) + return true; + } + return false; + }; + PHINode *LPHI = dyn_cast(LHS); + PHINode *RPHI = dyn_cast(RHS); + if ((LPHI && hasUndefInPHI(*LPHI)) || (RPHI && hasUndefInPHI(*RPHI))) + return true; + + auto hasUndefInSelect = [](SelectInst &SI) { + if (isa(SI.getTrueValue()) || + isa(SI.getFalseValue())) + return true; + return false; + }; + SelectInst *LSI = dyn_cast(LHS); + SelectInst *RSI = dyn_cast(RHS); + if ((LSI && hasUndefInSelect(*LSI)) || (RSI && hasUndefInSelect(*RSI))) + return true; + return false; +} + /// Do actual work and unswitch loop if possible and profitable. bool LoopUnswitch::processCurrentLoop() { bool Changed = false; @@ -666,8 +708,10 @@ bool LoopUnswitch::processCurrentLoop() { // unswitch on it if we desire. Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), currentLoop, Changed).first; - if (LoopCond && - UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) { + if (!LoopCond || EqualityPropUnSafe(*LoopCond)) + continue; + + if (UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) { ++NumBranches; return true; } @@ -1035,6 +1079,9 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) { if (!LoopExitBB || isa(LoopExitBB->begin())) return false; // Can't handle this. + if (EqualityPropUnSafe(*LoopCond)) + return false; + UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB, CurrentTerm); ++NumBranches; diff --git a/llvm/test/Transforms/LoopUnswitch/unswitch-equality-undef.ll b/llvm/test/Transforms/LoopUnswitch/unswitch-equality-undef.ll new file mode 100644 index 0000000..d77de7b --- /dev/null +++ b/llvm/test/Transforms/LoopUnswitch/unswitch-equality-undef.ll @@ -0,0 +1,121 @@ +; RUN: opt < %s -instcombine -licm -loop-unswitch -loop-unswitch-threshold=1000 -disable-output -stats 2>&1| FileCheck %s +; Check no loop unswitch is done because unswitching of equality expr with +; undef is unsafe before the freeze patch is committed. +; CHECK-NOT: Number of branches unswitched + +define void @ham(i64 %arg) local_unnamed_addr { +bb: + %tmp = icmp eq i64 %arg, 0 + br i1 %tmp, label %bb3, label %bb1 + +bb1: ; preds = %bb + %tmp2 = load volatile i64, i64* @global, align 8 + br label %bb3 + +bb3: ; preds = %bb1, %bb + %tmp4 = phi i64 [ %tmp2, %bb1 ], [ undef, %bb ] + %tmp5 = load i64, i64* @global.1, align 8 + br label %bb6 + +bb6: ; preds = %bb21, %bb3 + %tmp7 = phi i64 [ 3, %bb21 ], [ %tmp5, %bb3 ] + %tmp8 = phi i64 [ %tmp25, %bb21 ], [ 0, %bb3 ] + %tmp9 = icmp eq i64 %tmp7, %arg + br i1 %tmp9, label %bb10, label %bb28 + +bb10: ; preds = %bb6 + %tmp11 = icmp eq i64 %tmp7, 0 + br i1 %tmp11, label %bb21, label %bb12 + +bb12: ; preds = %bb10 + %tmp13 = load i64, i64* @global.2, align 8 + %tmp14 = add nsw i64 %tmp13, 1 + store i64 %tmp14, i64* @global.2, align 8 + %tmp15 = load i64, i64* @global.3, align 8 + %tmp16 = icmp eq i64 %tmp15, %tmp4 + br i1 %tmp16, label %bb17, label %bb21 + +bb17: ; preds = %bb12 + %tmp18 = phi i64 [ %tmp15, %bb12 ] + %tmp19 = load i64, i64* @global.4, align 8 + %tmp20 = add nsw i64 %tmp19, %tmp18 + store i64 %tmp20, i64* @global.5, align 8 + br label %bb29 + +bb21: ; preds = %bb12, %bb10 + %tmp22 = load i64, i64* @global.3, align 8 + %tmp23 = load volatile i64, i64* @global, align 8 + %tmp24 = add nsw i64 %tmp23, %tmp22 + store i64 %tmp24, i64* @global.5, align 8 + store i64 3, i64* @global.1, align 8 + %tmp25 = add nsw i64 %tmp8, 1 + %tmp26 = load i64, i64* @global.6, align 8 + %tmp27 = icmp slt i64 %tmp25, %tmp26 + br i1 %tmp27, label %bb6, label %bb28 + +bb28: ; preds = %bb21, %bb6 + br label %bb29 + +bb29: ; preds = %bb28, %bb17 + ret void +} + +define void @zot(i64 %arg, i64 %arg1) local_unnamed_addr { +bb: + %tmp = icmp eq i64 %arg, 0 + %tmp2 = select i1 %tmp, i64 %arg1, i64 undef + %tmp3 = load i64, i64* @global.1, align 8 + br label %bb4 + +bb4: ; preds = %bb19, %bb + %tmp5 = phi i64 [ 3, %bb19 ], [ %tmp3, %bb ] + %tmp6 = phi i64 [ %tmp23, %bb19 ], [ 0, %bb ] + %tmp7 = icmp eq i64 %tmp5, %arg + br i1 %tmp7, label %bb8, label %bb26 + +bb8: ; preds = %bb4 + %tmp9 = icmp eq i64 %tmp5, 0 + br i1 %tmp9, label %bb19, label %bb10 + +bb10: ; preds = %bb8 + %tmp11 = load i64, i64* @global.2, align 8 + %tmp12 = add nsw i64 %tmp11, 1 + store i64 %tmp12, i64* @global.2, align 8 + %tmp13 = load i64, i64* @global.3, align 8 + %tmp14 = icmp eq i64 %tmp13, %tmp2 + br i1 %tmp14, label %bb15, label %bb19 + +bb15: ; preds = %bb10 + %tmp16 = phi i64 [ %tmp13, %bb10 ] + %tmp17 = load i64, i64* @global.4, align 8 + %tmp18 = add nsw i64 %tmp17, %tmp16 + store i64 %tmp18, i64* @global.5, align 8 + br label %bb27 + +bb19: ; preds = %bb10, %bb8 + %tmp20 = load i64, i64* @global.3, align 8 + %tmp21 = load volatile i64, i64* @global, align 8 + %tmp22 = add nsw i64 %tmp21, %tmp20 + store i64 %tmp22, i64* @global.5, align 8 + store i64 3, i64* @global.1, align 8 + %tmp23 = add nsw i64 %tmp6, 1 + %tmp24 = load i64, i64* @global.6, align 8 + %tmp25 = icmp slt i64 %tmp23, %tmp24 + br i1 %tmp25, label %bb4, label %bb26 + +bb26: ; preds = %bb19, %bb4 + br label %bb27 + +bb27: ; preds = %bb26, %bb15 + ret void +} + +@global = common global i64 0, align 8 +@global.1 = common global i64 0, align 8 +@global.2 = common global i64 0, align 8 +@global.3 = common global i64 0, align 8 +@global.4 = common global i64 0, align 8 +@global.5 = common global i64 0, align 8 +@global.6 = common global i64 0, align 8 + + -- 2.7.4