[InstCombine] ease alignment restriction for converting masked load to normal load
authorSanjay Patel <spatel@rotateright.com>
Tue, 29 Sep 2020 19:25:04 +0000 (15:25 -0400)
committerSanjay Patel <spatel@rotateright.com>
Tue, 29 Sep 2020 19:26:22 +0000 (15:26 -0400)
I think we initially made this fold conservative to be safer, but we do not
need the alignment attribute/metadata limitation because the masked load
intrinsic itself specifies the alignment. A normal vector load is better for
IR transforms and should be no worse in codegen than the masked alternative.
If it is worse for some target, the backend can reverse this transform.

Differential Revision: https://reviews.llvm.org/D88505

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/test/Transforms/InstCombine/masked_intrinsics.ll

index 90571bd..465191b 100644 (file)
@@ -289,9 +289,8 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
 
   // If we can unconditionally load from this address, replace with a
   // load/select idiom. TODO: use DT for context sensitive query
-  if (isDereferenceableAndAlignedPointer(LoadPtr, II.getType(), Alignment,
-                                         II.getModule()->getDataLayout(), &II,
-                                         nullptr)) {
+  if (isDereferenceablePointer(LoadPtr, II.getType(),
+                               II.getModule()->getDataLayout(), &II, nullptr)) {
     Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
                                          "unmaskedload");
     return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
index a16f368..684e008 100644 (file)
@@ -100,8 +100,9 @@ define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable
 ; CHECK-LABEL: @load_speculative_less_aligned(
 ; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
 ; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]])
-; CHECK-NEXT:    ret <2 x double> [[RES]]
+; CHECK-NEXT:    [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]]
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
   %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1