From 2ce017026af538867d592b64a75e182cddab1961 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 23 Apr 2019 15:25:14 +0000 Subject: [PATCH] [InstCombine] Convert a masked.load of a dereferenceable address to an unconditional load If we have a masked.load from a location we know to be dereferenceable, we can simply issue a speculative unconditional load against that address. The key advantage is that it produces IR which is well understood by the optimizer. The select (cnd, load, passthrough) form produced should be pattern matchable back to hardware predication if profitable. Differential Revision: https://reviews.llvm.org/D59703 llvm-svn: 359000 --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 18 ++++++++++++++---- llvm/test/Transforms/InstCombine/masked_intrinsics.ll | 5 +++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 0fe52b1..51c72eb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Transforms/Utils/Local.h" @@ -1183,17 +1184,26 @@ static APInt possiblyDemandedEltsInMask(Value *Mask) { } // TODO, Obvious Missing Transforms: -// * Dereferenceable address -> speculative load/select // * Narrow width by halfs excluding zero/undef lanes static Value *simplifyMaskedLoad(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder) { + Value *LoadPtr = II.getArgOperand(0); + unsigned Alignment = cast(II.getArgOperand(1))->getZExtValue(); + // If the mask is all ones or undefs, this is a plain vector load of the 1st // argument. - if (maskIsAllOneOrUndef(II.getArgOperand(2))) { - Value *LoadPtr = II.getArgOperand(0); - unsigned Alignment = cast(II.getArgOperand(1))->getZExtValue(); + if (maskIsAllOneOrUndef(II.getArgOperand(2))) return Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, "unmaskedload"); + + // If we can unconditionally load from this address, replace with a + // load/select idiom. TODO: use DT for context sensitive query + if (isDereferenceableAndAlignedPointer(LoadPtr, Alignment, + II.getModule()->getDataLayout(), + &II, nullptr)) { + Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, + "unmaskedload"); + return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3)); } return nullptr; diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll index d71402e..b451724 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll @@ -87,8 +87,9 @@ define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) %ptr, ; CHECK-LABEL: @load_speculative( ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]]) -; CHECK-NEXT: ret <2 x double> [[RES]] +; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]] +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; double %pt, <2 x i1> %mask) { %ptv1 = insertelement <2 x double> undef, double %pt, i64 0 -- 2.7.4