From 9226ba6b376ea2a221e97c2f674841a496869f4d Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 6 Jun 2019 05:41:27 +0000
Subject: [PATCH] [X86] Don't turn avx masked.load with constant mask into
 masked.load+vselect when passthru value is all zeroes.

This is intended to enable the use of an immediate blend or
more optimal instruction. But if the passthru is zero we don't
need any additional instructions.

llvm-svn: 362675
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 3 +++
 llvm/test/CodeGen/X86/masked_load.ll    | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c7f5ebf..250af7a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -39120,6 +39120,9 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
   if (ML->getPassThru().isUndef())
     return SDValue();
 
+  if (ISD::isBuildVectorAllZeros(ML->getPassThru().getNode()))
+    return SDValue();
+
   // The new masked load has an undef pass-through operand. The select uses the
   // original pass-through operand.
   SDValue NewML = DAG.getMaskedLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
diff --git a/llvm/test/CodeGen/X86/masked_load.ll b/llvm/test/CodeGen/X86/masked_load.ll
index 738fb31..b5efae10 100644
--- a/llvm/test/CodeGen/X86/masked_load.ll
+++ b/llvm/test/CodeGen/X86/masked_load.ll
@@ -6852,8 +6852,6 @@ define <8 x float> @mload_constmask_v8f32_zero(<8 x float>* %addr, <8 x float> %
 ; AVX1OR2:       ## %bb.0:
 ; AVX1OR2-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967295,4294967295,4294967295,0,0,0,0,0]
 ; AVX1OR2-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm0
-; AVX1OR2-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; AVX1OR2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
 ; AVX1OR2-NEXT:    retq
 ;
 ; AVX512F-LABEL: mload_constmask_v8f32_zero:
-- 
2.7.4