From: Ayman Musa Date: Sun, 13 Nov 2016 14:51:25 +0000 (+0000) Subject: [X86][AVX512] Removing llvm x86 intrinsics for _mm_mask_move_{ss|sd} intrinsics. X-Git-Tag: llvmorg-4.0.0-rc1~4737 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c09b3769aed7adcf7277a94aa8deecaf851a701e;p=platform%2Fupstream%2Fllvm.git [X86][AVX512] Removing llvm x86 intrinsics for _mm_mask_move_{ss|sd} intrinsics. Differential Revision: https://reviews.llvm.org/D26128 llvm-svn: 286761 --- diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index d52f8dc..1c655e5 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -1668,20 +1668,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [IntrReadMem, IntrArgMemOnly]>; } -// Conditional move ops -let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_mask_move_ss : - GCCBuiltin<"__builtin_ia32_movss_mask">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_move_sd : - GCCBuiltin<"__builtin_ia32_movsd_mask">, - Intrinsic<[llvm_v2f64_ty], - [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], - [IntrNoMem]>; -} - // Conditional store ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_maskstore_pd : GCCBuiltin<"__builtin_ia32_maskstorepd">, diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 016bf87..becff7c 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -365,6 +365,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name.startswith("avx2.pblendd.") || Name.startswith("avx.vbroadcastf128") || Name == "avx2.vbroadcasti128" || + Name.startswith("avx512.mask.move.s") || Name == "xop.vpcmov" || (Name.startswith("xop.vpcom") && F->arg_size() == 2))) { NewFn = nullptr; @@ -679,6 +680,20 @@ static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, std::max(NumElts, 8U))); } +static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) { + Value* A = CI.getArgOperand(0); + Value* B = CI.getArgOperand(1); + Value* Src = CI.getArgOperand(2); + Value* Mask = CI.getArgOperand(3); + + Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1)); + Value* Cmp = Builder.CreateIsNotNull(AndNode); + Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0); + Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0); + Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2); + return Builder.CreateInsertElement(A, Select, (uint64_t)0); +} + // Replace a masked intrinsic with an older unmasked intrinsic. static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI, Intrinsic::ID IID) { @@ -1341,6 +1356,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1)); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); + } else if (IsX86 && Name.startswith("avx512.mask.move.s")) { + Rep = upgradeMaskedMove(Builder, *CI); } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) { VectorType *VecTy = cast(CI->getType()); Intrinsic::ID IID; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 4e52d7f..1e6fccf 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -794,10 +794,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::FMIN_RND, 0), X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_move_sd, INTR_TYPE_SCALAR_MASK, - X86ISD::MOVSD, 0), - X86_INTRINSIC_DATA(avx512_mask_move_ss, INTR_TYPE_SCALAR_MASK, - X86ISD::MOVSS, 0), X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL, X86ISD::FMUL_RND), X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL, diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index 5f02de3..3f38e68 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -1765,6 +1765,54 @@ define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> % ret <8 x i64> %res2 } +define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; CHECK-LABEL: test_mm_mask_move_ss: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq +entry: + %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U) + ret <4 x float> %res +} + + +define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; CHECK-LABEL: test_mm_maskz_move_ss: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: retq +entry: + %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U) + ret <4 x float> %res +} + +define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; CHECK-LABEL: test_mm_mask_move_sd: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} +; CHECK-NEXT: retq +entry: + %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U) + ret <2 x double> %res +} + +define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; CHECK-LABEL: test_mm_maskz_move_sd: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: retq +entry: + %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U) + ret <2 x double> %res +} + +declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8) +declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8) + declare <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8>, <16 x i32>, i16) define <16 x i32>@test_int_x86_avx512_mask_pmovzxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) {