From 53ceb4805f741779ccfef9b6b128e98068a61022 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 14 May 2018 18:21:22 +0000 Subject: [PATCH] [X86] Remove and autoupgrade avx512.vbroadcast.ss/avx512.vbroadcast.sd intrinsics. llvm-svn: 332271 --- llvm/include/llvm/IR/IntrinsicsX86.td | 7 ------- llvm/lib/IR/AutoUpgrade.cpp | 4 +++- llvm/lib/Target/X86/X86InstrAVX512.td | 5 ----- llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll | 20 ++++++++++++++++++++ llvm/test/CodeGen/X86/avx512-intrinsics.ll | 20 -------------------- 5 files changed, 23 insertions(+), 33 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 50127f7..45cb714 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -4107,13 +4107,6 @@ def int_x86_avx512_mask_range_ps_512 : GCCBuiltin<"__builtin_ia32_rangeps512_mas // Vector load with broadcast let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - // TODO: Remove the broadcast intrinsics with no gcc builtin and autoupgrade - def int_x86_avx512_vbroadcast_ss_512 : - Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; - - def int_x86_avx512_vbroadcast_sd_512 : - Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; - def int_x86_avx512_broadcastmw_512 : GCCBuiltin<"__builtin_ia32_broadcastmw512">, Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 43ed98f..d477f46 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -293,6 +293,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("avx512.mask.load.") || // Added in 3.9 Name == "sse42.crc32.64.8" || // Added in 3.4 Name.startswith("avx.vbroadcast.s") || // Added in 3.5 + Name.startswith("avx512.vbroadcast.s") || // Added in 7.0 Name.startswith("avx512.mask.palignr.") || // Added in 3.9 Name.startswith("avx512.mask.valign.") || // Added in 4.0 Name.startswith("sse2.psll.dq") || // Added in 3.7 @@ -1676,7 +1677,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); Rep = Builder.CreateZExt(Rep, CI->getType(), ""); - } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) { + } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") || + Name.startswith("avx512.vbroadcast.s"))) { // Replace broadcasts with a series of insertelements. Type *VecTy = CI->getType(); Type *EltTy = VecTy->getVectorElementType(); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 48c1913..cb995af 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1231,11 +1231,6 @@ defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", avx512vl_f64_info>, VEX_W; -def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src), - (VBROADCASTSSZm addr:$src)>; -def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src), - (VBROADCASTSDZm addr:$src)>; - multiclass avx512_int_broadcast_reg opc, SchedWrite SchedRR, X86VectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC> { diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index eed3078..798d040 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -4131,3 +4131,23 @@ define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b) ret <2 x double> %res } declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone + +define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) { +; CHECK-LABEL: test_x86_vbroadcast_ss_512: +; CHECK: ## %bb.0: +; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1] + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly + +define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) { +; CHECK-LABEL: test_x86_vbroadcast_sd_512: +; CHECK: ## %bb.0: +; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1] + ret <8 x double> %res +} +declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index e91f732..1f9aa38 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -645,26 +645,6 @@ define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly -define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) { -; CHECK-LABEL: test_x86_vbroadcast_ss_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 -; CHECK-NEXT: retq - %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1] - ret <16 x float> %res -} -declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly - -define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) { -; CHECK-LABEL: test_x86_vbroadcast_sd_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 -; CHECK-NEXT: retq - %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1] - ret <8 x double> %res -} -declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly - define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) { ; CHECK-LABEL: test_cmpps: ; CHECK: ## %bb.0: -- 2.7.4