From f7866fad5415ff8406fe7fa453fed1c4b08f3b92 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 2 Dec 2016 06:24:38 +0000 Subject: [PATCH] [AVX-512] Add masked VINSERTF/VINSERTI instructions to load folding tables. llvm-svn: 288481 --- llvm/lib/Target/X86/X86InstrInfo.cpp | 26 +++++++++++++++++++++++- llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll | 20 ++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index eb1cb0c..0f90493 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -2254,7 +2254,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGDZ128rmi, 0 }, { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGQZ128rmi, 0 }, - // AVX-512 masked arithmetic instructions + // AVX-512 masked instructions { X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 }, { X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 }, { X86::VANDNPDZrrkz, X86::VANDNPDZrmkz, 0 }, @@ -2263,6 +2263,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VANDPSZrrkz, X86::VANDPSZrmkz, 0 }, { X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 }, { X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 }, + { X86::VINSERTF32x4Zrrkz, X86::VINSERTF32x4Zrmkz, 0 }, + { X86::VINSERTF32x8Zrrkz, X86::VINSERTF32x8Zrmkz, 0 }, + { X86::VINSERTF64x2Zrrkz, X86::VINSERTF64x2Zrmkz, 0 }, + { X86::VINSERTF64x4Zrrkz, X86::VINSERTF64x4Zrmkz, 0 }, + { X86::VINSERTI32x4Zrrkz, X86::VINSERTI32x4Zrmkz, 0 }, + { X86::VINSERTI32x8Zrrkz, X86::VINSERTI32x8Zrmkz, 0 }, + { X86::VINSERTI64x2Zrrkz, X86::VINSERTI64x2Zrmkz, 0 }, + { X86::VINSERTI64x4Zrrkz, X86::VINSERTI64x4Zrmkz, 0 }, { X86::VMAXCPDZrrkz, X86::VMAXCPDZrmkz, 0 }, { X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0 }, { X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 }, @@ -2325,6 +2333,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VANDPSZ256rrkz, X86::VANDPSZ256rmkz, 0 }, { X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0 }, { X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0 }, + { X86::VINSERTF32x4Z256rrkz, X86::VINSERTF32x4Z256rmkz, 0 }, + { X86::VINSERTF64x2Z256rrkz, X86::VINSERTF64x2Z256rmkz, 0 }, + { X86::VINSERTI32x4Z256rrkz, X86::VINSERTI32x4Z256rmkz, 0 }, + { X86::VINSERTI64x2Z256rrkz, X86::VINSERTI64x2Z256rmkz, 0 }, { X86::VMAXCPDZ256rrkz, X86::VMAXCPDZ256rmkz, 0 }, { X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmkz, 0 }, { X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0 }, @@ -2516,6 +2528,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VANDPSZrrk, X86::VANDPSZrmk, 0 }, { X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 }, { X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 }, + { X86::VINSERTF32x4Zrrk, X86::VINSERTF32x4Zrmk, 0 }, + { X86::VINSERTF32x8Zrrk, X86::VINSERTF32x8Zrmk, 0 }, + { X86::VINSERTF64x2Zrrk, X86::VINSERTF64x2Zrmk, 0 }, + { X86::VINSERTF64x4Zrrk, X86::VINSERTF64x4Zrmk, 0 }, + { X86::VINSERTI32x4Zrrk, X86::VINSERTI32x4Zrmk, 0 }, + { X86::VINSERTI32x8Zrrk, X86::VINSERTI32x8Zrmk, 0 }, + { X86::VINSERTI64x2Zrrk, X86::VINSERTI64x2Zrmk, 0 }, + { X86::VINSERTI64x4Zrrk, X86::VINSERTI64x4Zrmk, 0 }, { X86::VMAXCPDZrrk, X86::VMAXCPDZrmk, 0 }, { X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0 }, { X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 }, @@ -2581,6 +2601,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VANDPSZ256rrk, X86::VANDPSZ256rmk, 0 }, { X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0 }, { X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0 }, + { X86::VINSERTF32x4Z256rrk,X86::VINSERTF32x4Z256rmk, 0 }, + { X86::VINSERTF64x2Z256rrk,X86::VINSERTF64x2Z256rmk, 0 }, + { X86::VINSERTI32x4Z256rrk,X86::VINSERTI32x4Z256rmk, 0 }, + { X86::VINSERTI64x2Z256rrk,X86::VINSERTI64x2Z256rmk, 0 }, { X86::VMAXCPDZ256rrk, X86::VMAXCPDZ256rmk, 0 }, { X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmk, 0 }, { X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0 }, diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll index 202355d..90b0c8d 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll @@ -496,5 +496,25 @@ define <8 x double> @stack_fold_insertf64x4(<4 x double> %a0, <4 x double> %a1) ret <8 x double> %2 } +define <8 x double> @stack_fold_insertf64x4_mask(<8 x double> %passthru, <4 x double> %a0, <4 x double> %a1, i8 %mask) { + ;CHECK-LABEL: stack_fold_insertf64x4_mask + ;CHECK: vinsertf64x4 $1, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <8 x i32> + %3 = bitcast i8 %mask to <8 x i1> + %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %passthru + ret <8 x double> %4 +} + +define <8 x double> @stack_fold_insertf64x4_maskz(<4 x double> %a0, <4 x double> %a1, i8 %mask) { + ;CHECK-LABEL: stack_fold_insertf64x4_maskz + ;CHECK: vinsertf64x4 $1, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[1-7]}}} {z} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <8 x i32> + %3 = bitcast i8 %mask to <8 x i1> + %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer + ret <8 x double> %4 +} + attributes #0 = { "unsafe-fp-math"="false" } attributes #1 = { "unsafe-fp-math"="true" } -- 2.7.4