From 3c90812f3b43fae3a4a8f0d0d6ec29824690431b Mon Sep 17 00:00:00 2001 From: Andrew Litteken Date: Wed, 9 Mar 2022 10:33:33 -0800 Subject: [PATCH] [IROutliner] Avoid reusing PHINodes that have already been matched when merging outlined functions' phi node blocks When there are two external phi nodes for two different outlined regions, when compressing the created phi nodes between the two regions, the matching for the second phi node in the second region matches the first phi node created for the first region rather than the second phi node created for the first region. This adds an extra output path where there should not be one. The fix is the ignore phi nodes that have already been matched for each region. Reviewer: paquette Differential Revision: https://reviews.llvm.org/D121312 --- llvm/lib/Transforms/IPO/IROutliner.cpp | 20 +++- .../IROutliner/duplicate-merging-phis.ll | 117 +++++++++++++++++++++ 2 files changed, 132 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Transforms/IROutliner/duplicate-merging-phis.ll diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index c355048..b4f4e01 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -1559,11 +1559,14 @@ findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region, /// \p PN in. /// \param OutputMappings [in] - The mapping of output values from outlined /// region to their original values. +/// \param UsedPhis [in, out] - The PHINodes in the block that have already been +/// matched. /// \return the newly found or created PHINode in \p OverallPhiBlock. static PHINode* findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, BasicBlock *OverallPhiBlock, - const DenseMap &OutputMappings) { + const DenseMap &OutputMappings, + DenseSet &UsedPHIs) { OutlinableGroup &Group = *Region.Parent; DenseSet PNCanonNums; @@ -1579,14 +1582,20 @@ findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, // Find the Canonical Numbering for each PHINode, if it matches, we replace // the uses of the PHINode we are searching for, with the found PHINode. for (PHINode &CurrPN : OverallPhiBlock->phis()) { + // If this PHINode has already been matched to another PHINode to be merged, + // we skip it. + if (UsedPHIs.find(&CurrPN) != UsedPHIs.end()) + continue; + CurrentCanonNums.clear(); findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums, /* ReplacedWithOutlinedCall = */ true); - if (all_of(PNCanonNums, [&CurrentCanonNums](unsigned CanonNum) { return CurrentCanonNums.contains(CanonNum); - })) + })) { + UsedPHIs.insert(&CurrPN); return &CurrPN; + } } // If we've made it here, it means we weren't able to replace the PHINode, so @@ -1646,6 +1655,7 @@ replaceArgumentUses(OutlinableRegion &Region, if (FirstFunction) DominatingFunction = Group.OutlinedFunction; DominatorTree DT(*DominatingFunction); + DenseSet UsedPHIs; for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size(); ArgIdx++) { @@ -1745,8 +1755,8 @@ replaceArgumentUses(OutlinableRegion &Region, // For our PHINode, we find the combined canonical numbering, and // attempt to find a matching PHINode in the overall PHIBlock. If we // cannot, we copy the PHINode and move it into this new block. - PHINode *NewPN = - findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock, OutputMappings); + PHINode *NewPN = findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock, + OutputMappings, UsedPHIs); NewI->setOperand(0, NewPN); } diff --git a/llvm/test/Transforms/IROutliner/duplicate-merging-phis.ll b/llvm/test/Transforms/IROutliner/duplicate-merging-phis.ll new file mode 100644 index 0000000..76bef61 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/duplicate-merging-phis.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; Make sure that when we merge phi nodes, we do not merge two different PHINodes +; as the same phi node. + +define void @f1() { +bb1: + %0 = add i32 1, 2 + %1 = add i32 3, 4 + %2 = add i32 5, 6 + %3 = add i32 7, 8 + br label %bb5 +bb2: + %4 = mul i32 5, 4 + br label %bb5 + +placeholder: + %a = sub i32 5, 4 + ret void + +bb5: + %phinode = phi i32 [5, %bb1], [5, %bb2] + %phinode1 = phi i32 [5, %bb1], [5, %bb2] + ret void +} + +define void @f2() { +bb1: + %0 = add i32 1, 2 + %1 = add i32 3, 4 + %2 = add i32 5, 6 + %3 = add i32 7, 8 + br label %bb5 +bb2: + %4 = mul i32 5, 4 + br label %bb5 + +placeholder: + %a = sub i32 5, 4 + ret void + +bb5: + %phinode = phi i32 [5, %bb1], [5, %bb2] + %phinode1 = phi i32 [5, %bb1], [5, %bb2] + ret void +} +; CHECK-LABEL: @f1( +; CHECK-NEXT: bb1: +; CHECK-NEXT: [[PHINODE1_CE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[PHINODE_CE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[PHINODE_CE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[PHINODE1_CE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[PHINODE_CE_LOC]], i32* [[PHINODE1_CE_LOC]]) +; CHECK-NEXT: [[PHINODE_CE_RELOAD:%.*]] = load i32, i32* [[PHINODE_CE_LOC]], align 4 +; CHECK-NEXT: [[PHINODE1_CE_RELOAD:%.*]] = load i32, i32* [[PHINODE1_CE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: br i1 [[TARGETBLOCK]], label [[BB5:%.*]], label [[BB1_AFTER_OUTLINE:%.*]] +; CHECK: bb1_after_outline: +; CHECK-NEXT: ret void +; CHECK: bb5: +; CHECK-NEXT: [[PHINODE:%.*]] = phi i32 [ [[PHINODE_CE_RELOAD]], [[BB1:%.*]] ] +; CHECK-NEXT: [[PHINODE1:%.*]] = phi i32 [ [[PHINODE1_CE_RELOAD]], [[BB1]] ] +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @f2( +; CHECK-NEXT: bb1: +; CHECK-NEXT: [[PHINODE1_CE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[PHINODE_CE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[PHINODE_CE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[PHINODE1_CE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[PHINODE_CE_LOC]], i32* [[PHINODE1_CE_LOC]]) +; CHECK-NEXT: [[PHINODE_CE_RELOAD:%.*]] = load i32, i32* [[PHINODE_CE_LOC]], align 4 +; CHECK-NEXT: [[PHINODE1_CE_RELOAD:%.*]] = load i32, i32* [[PHINODE1_CE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: br i1 [[TARGETBLOCK]], label [[BB5:%.*]], label [[BB1_AFTER_OUTLINE:%.*]] +; CHECK: bb1_after_outline: +; CHECK-NEXT: ret void +; CHECK: bb5: +; CHECK-NEXT: [[PHINODE:%.*]] = phi i32 [ [[PHINODE_CE_RELOAD]], [[BB1:%.*]] ] +; CHECK-NEXT: [[PHINODE1:%.*]] = phi i32 [ [[PHINODE1_CE_RELOAD]], [[BB1]] ] +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal i1 @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[BB1_TO_OUTLINE:%.*]] +; CHECK: bb1_to_outline: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 1, 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 3, 4 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 5, 6 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 7, 8 +; CHECK-NEXT: br label [[BB5_SPLIT:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 5, 4 +; CHECK-NEXT: br label [[BB5_SPLIT]] +; CHECK: placeholder: +; CHECK-NEXT: [[A:%.*]] = sub i32 5, 4 +; CHECK-NEXT: br label [[BB1_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: bb5.split: +; CHECK-NEXT: [[PHINODE_CE:%.*]] = phi i32 [ 5, [[BB1_TO_OUTLINE]] ], [ 5, [[BB2:%.*]] ] +; CHECK-NEXT: [[PHINODE1_CE:%.*]] = phi i32 [ 5, [[BB1_TO_OUTLINE]] ], [ 5, [[BB2]] ] +; CHECK-NEXT: br label [[BB5_EXITSTUB:%.*]] +; CHECK: bb5.exitStub: +; CHECK-NEXT: store i32 [[PHINODE_CE]], i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: store i32 [[PHINODE1_CE]], i32* [[TMP1:%.*]], align 4 +; CHECK-NEXT: ret i1 true +; CHECK: bb1_after_outline.exitStub: +; CHECK-NEXT: ret i1 false +; -- 2.7.4