[x86, InstCombine] fix masked load pass-through operand to be a zero vector

author Sanjay Patel <spatel@rotateright.com>

Tue, 12 Apr 2016 23:16:23 +0000 (23:16 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Tue, 12 Apr 2016 23:16:23 +0000 (23:16 +0000)
author Sanjay Patel <spatel@rotateright.com>
Tue, 12 Apr 2016 23:16:23 +0000 (23:16 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Tue, 12 Apr 2016 23:16:23 +0000 (23:16 +0000)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

index 89f164a..15d9f71 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -834,11 +834,12 @@ static Instruction *simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC) {
  static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) {
    Value *Ptr = II.getOperand(0);
    Value *Mask = II.getOperand(1);
+  Constant *ZeroVec = Constant::getNullValue(II.getType());
  
    // Special case a zero mask since that's not a ConstantDataVector.
-  // This masked load instruction does nothing, so return an undef.
+  // This masked load instruction creates a zero vector.
    if (isa<ConstantAggregateZero>(Mask))
-    return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
+    return IC.replaceInstUsesWith(II, ZeroVec);
  
    auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
    if (!ConstMask)
@@ -857,7 +858,9 @@ static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) {
    // on each element's most significant bit (the sign bit).
    Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
  
-  CallInst *NewMaskedLoad = IC.Builder->CreateMaskedLoad(PtrCast, 1, BoolMask);
+  // The pass-through vector for an x86 masked load is a zero vector.
+  CallInst *NewMaskedLoad =
+      IC.Builder->CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec);
    return IC.replaceInstUsesWith(II, NewMaskedLoad);
  }
  
diff --git a/llvm/test/Transforms/InstCombine/x86-masked-memops.ll b/llvm/test/Transforms/InstCombine/x86-masked-memops.ll

index 736af17..717a247 100644 (file)
--- a/llvm/test/Transforms/InstCombine/x86-masked-memops.ll
+++ b/llvm/test/Transforms/InstCombine/x86-masked-memops.ll
@@ -13,14 +13,14 @@ define <4 x float> @mload(i8* %f, <4 x i32> %mask) {
  ; CHECK-NEXT:  ret <4 x float> %ld
  }
  
-; Zero mask is a nop.
+; Zero mask returns a zero vector.
  
  define <4 x float> @mload_zeros(i8* %f) {
    %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> zeroinitializer)
    ret <4 x float> %ld
  
  ; CHECK-LABEL: @mload_zeros(
-; CHECK-NEXT:  ret <4 x float> undef
+; CHECK-NEXT:  ret <4 x float> zeroinitializer
  }
  
  ; Only the sign bit matters.
@@ -30,7 +30,7 @@ define <4 x float> @mload_fake_ones(i8* %f) {
    ret <4 x float> %ld
  
  ; CHECK-LABEL: @mload_fake_ones(
-; CHECK-NEXT:  ret <4 x float> undef
+; CHECK-NEXT:  ret <4 x float> zeroinitializer
  }
  
  ; All mask bits are set, so this is just a vector load.
@@ -53,7 +53,7 @@ define <4 x float> @mload_one_one(i8* %f) {
  
  ; CHECK-LABEL: @mload_one_one(
  ; CHECK-NEXT:  %castvec = bitcast i8* %f to <4 x float>*
-; CHECK-NEXT:  %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> undef)
+; CHECK-NEXT:  %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> zeroinitializer)
  ; CHECK-NEXT:  ret <4 x float> %1
  }
  
@@ -65,7 +65,7 @@ define <2 x double> @mload_one_one_double(i8* %f) {
  
  ; CHECK-LABEL: @mload_one_one_double(
  ; CHECK-NEXT:  %castvec = bitcast i8* %f to <2 x double>*
-; CHECK-NEXT:  %1 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x double> undef)
+; CHECK-NEXT:  %1 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x double> zeroinitializer)
  ; CHECK-NEXT:  ret <2 x double> %1
  }
  
@@ -77,7 +77,7 @@ define <8 x float> @mload_v8f32(i8* %f) {
  
  ; CHECK-LABEL: @mload_v8f32(
  ; CHECK-NEXT:  %castvec = bitcast i8* %f to <8 x float>*
-; CHECK-NEXT:  %1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x float> undef)
+; CHECK-NEXT:  %1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x float> zeroinitializer)
  ; CHECK-NEXT:  ret <8 x float> %1
  }
  
@@ -87,7 +87,7 @@ define <4 x double> @mload_v4f64(i8* %f) {
  
  ; CHECK-LABEL: @mload_v4f64(
  ; CHECK-NEXT:  %castvec = bitcast i8* %f to <4 x double>*
-; CHECK-NEXT:  %1 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> undef)
+; CHECK-NEXT:  %1 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> zeroinitializer)
  ; CHECK-NEXT:  ret <4 x double> %1
  }
  
@@ -99,7 +99,7 @@ define <4 x i32> @mload_v4i32(i8* %f) {
  
  ; CHECK-LABEL: @mload_v4i32(
  ; CHECK-NEXT:  %castvec = bitcast i8* %f to <4 x i32>*
-; CHECK-NEXT:  %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> undef)
+; CHECK-NEXT:  %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer)
  ; CHECK-NEXT:  ret <4 x i32> %1
  }
  
@@ -109,7 +109,7 @@ define <2 x i64> @mload_v2i64(i8* %f) {
  
  ; CHECK-LABEL: @mload_v2i64(
  ; CHECK-NEXT:  %castvec = bitcast i8* %f to <2 x i64>*
-; CHECK-NEXT:  %1 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x i64> undef)
+; CHECK-NEXT:  %1 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x i64> zeroinitializer)
  ; CHECK-NEXT:  ret <2 x i64> %1
  }
  
@@ -119,7 +119,7 @@ define <8 x i32> @mload_v8i32(i8* %f) {
  
  ; CHECK-LABEL: @mload_v8i32(
  ; CHECK-NEXT:  %castvec = bitcast i8* %f to <8 x i32>*
-; CHECK-NEXT:  %1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i32> undef)
+; CHECK-NEXT:  %1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i32> zeroinitializer)
  ; CHECK-NEXT:  ret <8 x i32> %1
  }
  
@@ -129,7 +129,7 @@ define <4 x i64> @mload_v4i64(i8* %f) {
  
  ; CHECK-LABEL: @mload_v4i64(
  ; CHECK-NEXT:  %castvec = bitcast i8* %f to <4 x i64>*
-; CHECK-NEXT:  %1 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> undef)
+; CHECK-NEXT:  %1 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> zeroinitializer)
  ; CHECK-NEXT:  ret <4 x i64> %1
  }
author	Sanjay Patel <spatel@rotateright.com>
	Tue, 12 Apr 2016 23:16:23 +0000 (23:16 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Tue, 12 Apr 2016 23:16:23 +0000 (23:16 +0000)
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp		patch \| blob \| history
llvm/test/Transforms/InstCombine/x86-masked-memops.ll		patch \| blob \| history