From 315aef667ec60675627ecd0fe1424f9c186766de Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 1 Sep 2022 15:29:40 +0200 Subject: [PATCH] [LICM] Fix thread safety checks for promotion of byval args This code was relying on a very subtle contract: The expectation was that for non-allocas, the unwind safety check would already perform a capture check, so we don't need to perform it later. This held true when this unwind safety was only handled for allocas and noalias calls, but became incorrect when byval support was added. To avoid this kind of issue, just remove the dependency between the unwind and thread-safety checks entirely. At worst, this means we perform a redundant capture check. If this should turn out to be problematic for compile-time, we can cache that query in a more explicit way. --- llvm/lib/Transforms/Scalar/LICM.cpp | 17 +++++------------ llvm/test/Transforms/LICM/promote-capture.ll | 9 ++++----- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 550ceca..41f5627 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1973,7 +1973,6 @@ bool llvm::promoteLoopAccessesToScalars( const DataLayout &MDL = Preheader->getModule()->getDataLayout(); - bool IsKnownThreadLocalObject = false; if (SafetyInfo->anyBlockMayThrow()) { // If a loop can throw, we have to insert a store along each unwind edge. // That said, we can't actually make the unwind edge explicit. Therefore, @@ -1983,9 +1982,6 @@ bool llvm::promoteLoopAccessesToScalars( Value *Object = getUnderlyingObject(SomePtr); if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT)) return false; - // Subtlety: Alloca's aren't visible to callers, but *are* potentially - // visible to other threads if captured and used during their lifetimes. - IsKnownThreadLocalObject = !isa(Object); } // Check that all accesses to pointers in the alias set use the same type. @@ -2113,14 +2109,11 @@ bool llvm::promoteLoopAccessesToScalars( // stores along paths which originally didn't have them without violating the // memory model. if (!SafeToInsertStore) { - if (IsKnownThreadLocalObject) - SafeToInsertStore = true; - else { - Value *Object = getUnderlyingObject(SomePtr); - SafeToInsertStore = - (isNoAliasCall(Object) || isa(Object)) && - isNotCapturedBeforeOrInLoop(Object, CurLoop, DT); - } + Value *Object = getUnderlyingObject(SomePtr); + SafeToInsertStore = + (isNoAliasCall(Object) || isa(Object) || + (isa(Object) && cast(Object)->hasByValAttr())) && + isNotCapturedBeforeOrInLoop(Object, CurLoop, DT); } // If we've still failed to prove we can sink the store, hoist the load diff --git a/llvm/test/Transforms/LICM/promote-capture.ll b/llvm/test/Transforms/LICM/promote-capture.ll index 3211d02..a409a49 100644 --- a/llvm/test/Transforms/LICM/promote-capture.ll +++ b/llvm/test/Transforms/LICM/promote-capture.ll @@ -156,7 +156,7 @@ exit: ret void } -; FIXME: Should not get promoted, because the pointer is captured and may not +; Should not get promoted, because the pointer is captured and may not ; be thread-local. define void @test_captured_before_loop_byval(i32* byval(i32) align 4 %count, i32 %len) { ; CHECK-LABEL: @test_captured_before_loop_byval( @@ -172,6 +172,7 @@ define void @test_captured_before_loop_byval(i32* byval(i32) align 4 %count, i32 ; CHECK-NEXT: br i1 [[COND]], label [[IF:%.*]], label [[LATCH]] ; CHECK: if: ; CHECK-NEXT: [[C_INC:%.*]] = add i32 [[C_INC2]], 1 +; CHECK-NEXT: store i32 [[C_INC]], i32* [[COUNT]], align 4 ; CHECK-NEXT: br label [[LATCH]] ; CHECK: latch: ; CHECK-NEXT: [[C_INC1]] = phi i32 [ [[C_INC]], [[IF]] ], [ [[C_INC2]], [[LOOP]] ] @@ -179,8 +180,6 @@ define void @test_captured_before_loop_byval(i32* byval(i32) align 4 %count, i32 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[LEN:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[C_INC1_LCSSA:%.*]] = phi i32 [ [[C_INC1]], [[LATCH]] ] -; CHECK-NEXT: store i32 [[C_INC1_LCSSA]], i32* [[COUNT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -212,7 +211,6 @@ define void @test_captured_after_loop_byval(i32* byval(i32) align 4 %count, i32 ; CHECK-LABEL: @test_captured_after_loop_byval( ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 0, i32* [[COUNT:%.*]], align 4 -; CHECK-NEXT: call void @capture(i32* [[COUNT]]) ; CHECK-NEXT: [[COUNT_PROMOTED:%.*]] = load i32, i32* [[COUNT]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -231,11 +229,11 @@ define void @test_captured_after_loop_byval(i32* byval(i32) align 4 %count, i32 ; CHECK: exit: ; CHECK-NEXT: [[C_INC1_LCSSA:%.*]] = phi i32 [ [[C_INC1]], [[LATCH]] ] ; CHECK-NEXT: store i32 [[C_INC1_LCSSA]], i32* [[COUNT]], align 4 +; CHECK-NEXT: call void @capture(i32* [[COUNT]]) ; CHECK-NEXT: ret void ; entry: store i32 0, i32* %count - call void @capture(i32* %count) br label %loop loop: @@ -255,5 +253,6 @@ latch: br i1 %cmp, label %exit, label %loop exit: + call void @capture(i32* %count) ret void } -- 2.7.4