// Vector load with broadcast
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- // TODO: Remove the broadcast intrinsics with no gcc builtin and autoupgrade
- def int_x86_avx512_vbroadcast_ss_512 :
- Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
-
- def int_x86_avx512_vbroadcast_sd_512 :
- Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
-
def int_x86_avx512_broadcastmw_512 :
GCCBuiltin<"__builtin_ia32_broadcastmw512">,
Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>;
Name.startswith("avx512.mask.load.") || // Added in 3.9
Name == "sse42.crc32.64.8" || // Added in 3.4
Name.startswith("avx.vbroadcast.s") || // Added in 3.5
+ Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
Name.startswith("avx512.mask.palignr.") || // Added in 3.9
Name.startswith("avx512.mask.valign.") || // Added in 4.0
Name.startswith("sse2.psll.dq") || // Added in 3.7
Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
Rep = Builder.CreateZExt(Rep, CI->getType(), "");
- } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
+ } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
+ Name.startswith("avx512.vbroadcast.s"))) {
// Replace broadcasts with a series of insertelements.
Type *VecTy = CI->getType();
Type *EltTy = VecTy->getVectorElementType();
defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
avx512vl_f64_info>, VEX_W;
-def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
- (VBROADCASTSSZm addr:$src)>;
-def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
- (VBROADCASTSDZm addr:$src)>;
-
multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
X86VectorVTInfo _, SDPatternOperator OpNode,
RegisterClass SrcRC> {
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone
+
+define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
+; CHECK-LABEL: test_x86_vbroadcast_ss_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vbroadcastss (%rdi), %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
+ ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
+
+define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
+; CHECK-LABEL: test_x86_vbroadcast_sd_512:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
+ ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
-define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
-; CHECK-LABEL: test_x86_vbroadcast_ss_512:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vbroadcastss (%rdi), %zmm0
-; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
- ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
-
-define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
-; CHECK-LABEL: test_x86_vbroadcast_sd_512:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0
-; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
- ret <8 x double> %res
-}
-declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
-
define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
; CHECK-LABEL: test_cmpps:
; CHECK: ## %bb.0: