From f4bf9119a11ff30228bee3ff3977d1810900f379 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 19 Jan 2015 06:07:27 +0000 Subject: [PATCH] [x86] Change AVX512 intrinsics to take a 8-bit immediate for the comparision kind instead of a 32-bit immediate. This better aligns with the emitted instruction. It also matches SSE and AVX1 equivalents. Also add auto upgrade support. llvm-svn: 226430 --- llvm/include/llvm/IR/IntrinsicsX86.td | 4 ++-- llvm/lib/IR/AutoUpgrade.cpp | 37 +++++++++++++++++++++++++++++++++++ llvm/lib/Target/X86/X86InstrAVX512.td | 8 ++++---- llvm/lib/Target/X86/X86InstrInfo.td | 4 ---- 4 files changed, 43 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 427cb50..671dde8 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -3882,10 +3882,10 @@ let TargetPrefix = "x86" in { // Misc. let TargetPrefix = "x86" in { def int_x86_avx512_mask_cmp_ps_512 : GCCBuiltin<"__builtin_ia32_cmpps512_mask">, - Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, + Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_cmp_pd_512 : GCCBuiltin<"__builtin_ia32_cmppd512_mask">, - Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, + Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_pand_d_512 : GCCBuiltin<"__builtin_ia32_pandd512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index e3544df..e734c7f 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -60,6 +60,21 @@ static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, return true; } +// Upgrade the declarations of AVX-512 cmp intrinsic functions whose 8-bit +// immediates have changed their type from i32 to i8. +static bool UpgradeAVX512CmpIntrinsic(Function *F, Intrinsic::ID IID, + Function *&NewFn) { + // Check that the last argument is an i32. + Type *LastArgType = F->getFunctionType()->getParamType(2); + if (!LastArgType->isIntegerTy(32)) + return false; + + // Move this function aside and map down. + F->setName(F->getName() + ".old"); + NewFn = Intrinsic::getDeclaration(F->getParent(), IID); + return true; +} + static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { assert(F && "Illegal to upgrade a non-existent Function."); @@ -206,6 +221,13 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, NewFn); + if (Name == "x86.avx512.mask.cmp.ps.512") + return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_ps_512, + NewFn); + if (Name == "x86.avx512.mask.cmp.pd.512") + return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_pd_512, + NewFn); + // frcz.ss/sd may need to have an argument dropped if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) { F->setName(Name + ".old"); @@ -547,6 +569,21 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); return; } + case Intrinsic::x86_avx512_mask_cmp_ps_512: + case Intrinsic::x86_avx512_mask_cmp_pd_512: { + // Need to truncate the last argument from i32 to i8 -- this argument models + // an inherently 8-bit immediate operand to these x86 instructions. + SmallVector Args(CI->arg_operands().begin(), + CI->arg_operands().end()); + + // Replace the last argument with a trunc. + Args[2] = Builder.CreateTrunc(Args[2], Type::getInt8Ty(C), "trunc"); + + CallInst *NewCall = Builder.CreateCall(NewFn, Args); + CI->replaceAllUsesWith(NewCall); + CI->eraseFromParent(); + return; + } } } diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index ff3587c..573544c 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1577,25 +1577,25 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), imm:$cc), VK8)>; def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1), - (v16f32 VR512:$src2), i32immZExt5:$cc, (i16 -1), + (v16f32 VR512:$src2), i8immZExt5:$cc, (i16 -1), FROUND_NO_EXC)), (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2, (I8Imm imm:$cc)), GR16)>; def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1), - (v8f64 VR512:$src2), i32immZExt5:$cc, (i8 -1), + (v8f64 VR512:$src2), i8immZExt5:$cc, (i8 -1), FROUND_NO_EXC)), (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2, (I8Imm imm:$cc)), GR8)>; def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1), - (v16f32 VR512:$src2), i32immZExt5:$cc, (i16 -1), + (v16f32 VR512:$src2), i8immZExt5:$cc, (i16 -1), FROUND_CURRENT)), (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2, (I8Imm imm:$cc)), GR16)>; def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1), - (v8f64 VR512:$src2), i32immZExt5:$cc, (i8 -1), + (v8f64 VR512:$src2), i8immZExt5:$cc, (i8 -1), FROUND_CURRENT)), (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2, (I8Imm imm:$cc)), GR8)>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 53715dc..64e4636 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -555,10 +555,6 @@ def AVXCC : Operand { def i8immZExt5 : ImmLeaf= 0 && Imm < 32; }]>; -// AVX-512 uses a 32-bit immediate in their intrinsics -def i32immZExt5 : ImmLeaf= 0 && Imm < 32; -}]>; class ImmSExtAsmOperandClass : AsmOperandClass { let SuperClasses = [ImmAsmOperand]; -- 2.7.4