From 055d2095898dfbb58b71322c02fbba7e71e8f76a Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 2 Sep 2020 14:05:41 -0500 Subject: [PATCH] Handle masked loads and stores in MemoryLocation/Dependence Differential Revision: https://reviews.llvm.org/D87061 --- llvm/lib/Analysis/MemoryDependenceAnalysis.cpp | 23 ++++++++++++++++++++++- llvm/lib/Analysis/MemoryLocation.cpp | 15 +++++++++++++++ llvm/test/Transforms/GVN/masked-load-store.ll | 6 ++++-- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 2428d57..a19c1d7 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -166,6 +166,12 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. return ModRefInfo::Mod; + case Intrinsic::masked_load: + Loc = MemoryLocation::getForArgument(II, 0, TLI); + return ModRefInfo::Ref; + case Intrinsic::masked_store: + Loc = MemoryLocation::getForArgument(II, 1, TLI); + return ModRefInfo::Mod; default: break; } @@ -442,7 +448,9 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( if (IntrinsicInst *II = dyn_cast(Inst)) { // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. - if (II->getIntrinsicID() == Intrinsic::lifetime_start) { + Intrinsic::ID ID = II->getIntrinsicID(); + switch (ID) { + case Intrinsic::lifetime_start: // FIXME: This only considers queries directly on the invariant-tagged // pointer, not on query pointers that are indexed off of them. It'd // be nice to handle that at some point (the right approach is to use @@ -450,6 +458,19 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( if (BatchAA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc)) return MemDepResult::getDef(II); continue; + case Intrinsic::masked_load: + case Intrinsic::masked_store: { + MemoryLocation Loc; + /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI); + AliasResult R = BatchAA.alias(Loc, MemLoc); + if (R == NoAlias) + continue; + if (R == MustAlias) + return MemDepResult::getDef(II); + if (ID == Intrinsic::masked_load) + continue; + return MemDepResult::getClobber(II); + } } } diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp index 9694036..fcea03a 100644 --- a/llvm/lib/Analysis/MemoryLocation.cpp +++ b/llvm/lib/Analysis/MemoryLocation.cpp @@ -176,6 +176,21 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call, cast(II->getArgOperand(0))->getZExtValue()), AATags); + case Intrinsic::masked_load: + assert(ArgIdx == 0 && "Invalid argument index"); + return MemoryLocation( + Arg, + LocationSize::upperBound(DL.getTypeStoreSize(II->getType())), + AATags); + + case Intrinsic::masked_store: + assert(ArgIdx == 1 && "Invalid argument index"); + return MemoryLocation( + Arg, + LocationSize::upperBound( + DL.getTypeStoreSize(II->getArgOperand(0)->getType())), + AATags); + case Intrinsic::invariant_end: // The first argument to an invariant.end is a "descriptor" type (e.g. a // pointer to a empty struct) which is never actually dereferenced. diff --git a/llvm/test/Transforms/GVN/masked-load-store.ll b/llvm/test/Transforms/GVN/masked-load-store.ll index 8119d77..0b71a10 100644 --- a/llvm/test/Transforms/GVN/masked-load-store.ll +++ b/llvm/test/Transforms/GVN/masked-load-store.ll @@ -1,6 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -gvn -S < %s | FileCheck %s +; Check that in both cases the second load is recognized as redundant +; and is removed. + define <128 x i8> @f0(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { ; CHECK-LABEL: @f0( ; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] @@ -21,8 +24,7 @@ define <128 x i8> @f1(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { ; CHECK-NEXT: [[V1:%.*]] = getelementptr <128 x i8>, <128 x i8>* [[A0:%.*]], i32 1 ; CHECK-NEXT: [[V2:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) ; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[A2]], <128 x i8>* [[V1]], i32 4, <128 x i1> [[V0]]) -; CHECK-NEXT: [[V3:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) -; CHECK-NEXT: [[V4:%.*]] = add <128 x i8> [[V2]], [[V3]] +; CHECK-NEXT: [[V4:%.*]] = add <128 x i8> [[V2]], [[V2]] ; CHECK-NEXT: ret <128 x i8> [[V4]] ; %v0 = icmp eq <128 x i8> %a1, %a2 -- 2.7.4