return true;
}
+ // The SSE2 version is too weird (eg, unaligned but non-temporal) to do
+ // anything else at this level.
+ if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
+ return false;
+
auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
if (!ConstMask)
return false;
return I;
break;
+ case Intrinsic::x86_sse2_maskmov_dqu:
case Intrinsic::x86_avx_maskstore_ps:
case Intrinsic::x86_avx_maskstore_pd:
case Intrinsic::x86_avx_maskstore_ps_256:
; CHECK-NEXT: ret void
}
+; The original SSE2 masked store variant.
+
+define void @mstore_v16i8_sse2_zeros(<16 x i8> %d, i8* %p) {
+ tail call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %d, <16 x i8> zeroinitializer, i8* %p)
+ ret void
+
+; CHECK-LABEL: @mstore_v16i8_sse2_zeros(
+; CHECK-NEXT: ret void
+}
+
+
declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>)
declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>)
declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>)
declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>)
declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>)
+declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*)
+