From 142897dd7d587eb7ab6827a626675438527ef75e Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sun, 10 Jul 2022 13:46:42 -0500 Subject: [PATCH] [Attributor] Only non-exact accesses require a uniform bit-pattern (=0) If we only have exact accesses we should never require the bit-pattern to be uniform (in this case 0). Only a non-exact access should force us to require only 0 values. --- llvm/lib/Transforms/IPO/Attributor.cpp | 8 ++++---- llvm/test/Transforms/Attributor/value-simplify-gpu.ll | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index aaaadf9..952c188 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -378,13 +378,13 @@ static bool getPotentialCopiesOfMemoryValue( bool NullOnly = true; bool NullRequired = false; - auto CheckForNullOnlyAndUndef = [&](Optional V) { + auto CheckForNullOnlyAndUndef = [&](Optional V, bool IsExact) { if (!V || *V == nullptr) NullOnly = false; else if (isa(*V)) /* No op */; else if (isa(*V) && cast(*V)->isNullValue()) - NullRequired = true; + NullRequired = !IsExact; else NullOnly = false; }; @@ -395,7 +395,7 @@ static bool getPotentialCopiesOfMemoryValue( LLVM_DEBUG(dbgs() << "Failed to get initial value: " << *Obj << "\n"); return false; } - CheckForNullOnlyAndUndef(InitialValue); + CheckForNullOnlyAndUndef(InitialValue, /* IsExact */ true); NewCopies.push_back(InitialValue); NewCopyOrigins.push_back(nullptr); } @@ -405,7 +405,7 @@ static bool getPotentialCopiesOfMemoryValue( return true; if (IsLoad && Acc.isWrittenValueYetUndetermined()) return true; - CheckForNullOnlyAndUndef(Acc.getContent()); + CheckForNullOnlyAndUndef(Acc.getContent(), IsExact); if (OnlyExact && !IsExact && !NullOnly && !isa_and_nonnull(Acc.getWrittenValue())) { LLVM_DEBUG(dbgs() << "Non exact access " << *Acc.getRemoteInst() diff --git a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll index 0ef99fc..fa19fe6 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll @@ -288,7 +288,7 @@ define internal void @level2a(i32* %addr) { ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR6]] +; IS__TUNIT____-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR6]] ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nosync nounwind @@ -298,7 +298,7 @@ define internal void @level2a(i32* %addr) { ; IS__CGSCC____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__CGSCC____-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 ; IS__CGSCC____-NEXT: [[QQQQ2:%.*]] = load i32, i32* [[ADDR]], align 4 -; IS__CGSCC____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[QQQQ2]]) #[[ATTR4]] +; IS__CGSCC____-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 [[QQQQ2]]) #[[ATTR4]] ; IS__CGSCC____-NEXT: ret void ; entry: @@ -316,7 +316,7 @@ define internal void @level2b(i32* %addr) { ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR6]] +; IS__TUNIT____-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR6]] ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nosync nounwind @@ -326,7 +326,7 @@ define internal void @level2b(i32* %addr) { ; IS__CGSCC____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__CGSCC____-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 ; IS__CGSCC____-NEXT: [[TMP2:%.*]] = load i32, i32* [[ADDR]], align 4 -; IS__CGSCC____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR4]] +; IS__CGSCC____-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 [[TMP2]]) #[[ATTR4]] ; IS__CGSCC____-NEXT: ret void ; entry: -- 2.7.4