This can give the register allocator more registers to use.
llvm-svn: 290057
[(set VR256X:$dst, (v8i32 immAllZerosV))]>;
}
+// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
+// This is expanded by ExpandPostRAPseudos.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasVLX, HasDQI] in {
+ def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
+ [(set FR32X:$dst, fp32imm0)]>;
+ def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
+ [(set FR64X:$dst, fpimm0)]>;
+}
+
//===----------------------------------------------------------------------===//
// AVX-512 - VECTOR INSERT
//
return Expand2AddrUndef(MIB, get(X86::VPXORDZ256rr));
case X86::AVX512_512_SET0:
return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
+ case X86::AVX512_FsFLD0SS:
+ case X86::AVX512_FsFLD0SD:
+ return Expand2AddrUndef(MIB, get(X86::VXORPSZ128rr));
case X86::V_SETALLONES:
return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
case X86::AVX2_SETALLONES:
Alignment = 16;
break;
case X86::FsFLD0SD:
+ case X86::AVX512_FsFLD0SD:
Alignment = 8;
break;
case X86::FsFLD0SS:
+ case X86::AVX512_FsFLD0SS:
Alignment = 4;
break;
default:
case X86::AVX512_512_SET0:
case X86::AVX512_512_SETALLONES:
case X86::FsFLD0SD:
- case X86::FsFLD0SS: {
+ case X86::AVX512_FsFLD0SD:
+ case X86::FsFLD0SS:
+ case X86::AVX512_FsFLD0SS: {
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
MachineConstantPool &MCP = *MF.getConstantPool();
Type *Ty;
unsigned Opc = LoadMI.getOpcode();
- if (Opc == X86::FsFLD0SS)
+ if (Opc == X86::FsFLD0SS || Opc == X86::AVX512_FsFLD0SS)
Ty = Type::getFloatTy(MF.getFunction()->getContext());
- else if (Opc == X86::FsFLD0SD)
+ else if (Opc == X86::FsFLD0SD || Opc == X86::AVX512_FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()),16);
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, SchedRW = [WriteZero] in {
def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>;
+ [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoVLX_Or_NoDQI]>;
def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>;
+ [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2, NoVLX_Or_NoDQI]>;
}
//===----------------------------------------------------------------------===//
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s --check-prefix AVX512
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s --check-prefix AVX512 --check-prefix AVX512-KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding | FileCheck %s --check-prefix AVX512 --check-prefix AVX512-SKX
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx --show-mc-encoding | FileCheck %s --check-prefix AVX
; AVX512-LABEL: @test_fdiv
ret float %max
}
+; AVX512-SKX-LABEL: @zero_float
+; AVX512-SKX: vxorps %xmm{{.*}}, %xmm{{.*}}, %xmm{{.*}} ## encoding: [0x62,
+; AVX512-KNL-LABEL: @zero_float
+; AVX512-KNL: vxorps %xmm{{.*}}, %xmm{{.*}}, %xmm{{.*}} ## encoding: [0xc5,
+; AVX-LABEL: @zero_float
+; AVX: vxorps %xmm{{.*}}, %xmm{{.*}}, %xmm{{.*}} ## encoding: [0xc5,
+
+define float @zero_float(float %a) {
+ %b = fadd float %a, 0.0
+ ret float %b
+}
+
+; AVX512-SKX-LABEL: @zero_double
+; AVX512-SKX: vxorpd %xmm{{.*}}, %xmm{{.*}}, %xmm{{.*}} ## encoding: [0x62,
+; AVX512-KNL-LABEL: @zero_double
+; AVX512-KNL: vxorpd %xmm{{.*}}, %xmm{{.*}}, %xmm{{.*}} ## encoding: [0xc5,
+; AVX-LABEL: @zero_double
+; AVX: vxorpd %xmm{{.*}}, %xmm{{.*}}, %xmm{{.*}} ## encoding: [0xc5,
+
+define double @zero_double(double %a) {
+ %b = fadd double %a, 0.0
+ ret double %b
+}