From 15694fd6ad955c6a16b446a6324364111a49ae8b Mon Sep 17 00:00:00 2001 From: Anna Thomas Date: Tue, 3 Nov 2020 11:06:41 -0500 Subject: [PATCH] [CaptureTracking] Avoid overly restrictive dominates check CapturesBefore tracker has an overly restrictive dominates check when the `BeforeHere` and the capture point are in different basic blocks. All we need to check is that there is no path from the capture point to `BeforeHere` (which is less stricter than the dominates check). See added testcase in one of the users of CapturesBefore. Reviewed-By: jdoerfert Differential Revision: https://reviews.llvm.org/D90688 --- llvm/lib/Analysis/CaptureTracking.cpp | 8 ++++---- llvm/test/Transforms/MemCpyOpt/callslot.ll | 33 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Analysis/CaptureTracking.cpp b/llvm/lib/Analysis/CaptureTracking.cpp index 06a3b81e0..15e022c 100644 --- a/llvm/lib/Analysis/CaptureTracking.cpp +++ b/llvm/lib/Analysis/CaptureTracking.cpp @@ -133,10 +133,10 @@ namespace { return !isPotentiallyReachableFromMany(Worklist, BB, nullptr, DT); } - // If the value is defined in the same basic block as use and BeforeHere, - // there is no need to explore the use if BeforeHere dominates use. - // Check whether there is a path from I to BeforeHere. - if (BeforeHere != I && DT->dominates(BeforeHere, I) && + // If the value is defined in a different basic block than BeforeHere, + // there is no need to explore the use if there is no path from I to + // BeforeHere. + if (BeforeHere != I && !isPotentiallyReachable(I, BeforeHere, nullptr, DT)) return true; diff --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll index f85a676..c9d2364 100644 --- a/llvm/test/Transforms/MemCpyOpt/callslot.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll @@ -186,6 +186,39 @@ define void @capture_before_call_argmemonly() { ret void } +; There is no path from the capture back to the memcpy. +; So we can perform the call slot optimization. +define void @capture_nopath_call_argmemonly(i1 %cond) { +; CHECK-LABEL: @capture_nopath_call_argmemonly( +; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[DEST_I8:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* +; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* +; CHECK-NEXT: br i1 [[COND:%.*]], label [[CAPTURES:%.*]], label [[NOCAPTURES:%.*]] +; CHECK: captures: +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST_I8]]) +; CHECK-NEXT: ret void +; CHECK: nocaptures: +; CHECK-NEXT: [[DEST1:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST1]]) [[ATTR5:#.*]] +; CHECK-NEXT: ret void +; + %dest = alloca [16 x i8] + %src = alloca [16 x i8] + %dest.i8 = bitcast [16 x i8]* %dest to i8* + %src.i8 = bitcast [16 x i8]* %src to i8* + br i1 %cond, label %captures, label %nocaptures + +captures: + call void @accept_ptr(i8* %dest.i8) ; capture + ret void + +nocaptures: + call void @accept_ptr(i8* %src.i8) argmemonly nounwind + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest.i8, i8* %src.i8, i64 16, i1 false) + ret void +} + define void @capture_before_call_argmemonly_nounwind() { ; CHECK-LABEL: @capture_before_call_argmemonly_nounwind( ; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 -- 2.7.4