From 938e7ab9e1868f56a46d4b0e04dfdd94a50ad7c2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 7 Aug 2016 17:14:05 +0000 Subject: [PATCH] [AVX-512] Add EVEX encoded floating point MAX/MIN instructions to the load folding tables. llvm-svn: 277960 --- llvm/lib/Target/X86/X86InstrInfo.cpp | 38 +++++++++++- llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll | 71 ++++++++++++++++++++++ 2 files changed, 106 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index a3c11cb..cabeb5e 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1720,10 +1720,26 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VDIVSSZrr_Int, X86::VDIVSSZrm_Int, 0 }, { X86::VDIVSDZrr, X86::VDIVSDZrm, 0 }, { X86::VDIVSDZrr_Int, X86::VDIVSDZrm_Int, 0 }, - { X86::VMINPSZrr, X86::VMINPSZrm, 0 }, - { X86::VMINPDZrr, X86::VMINPDZrm, 0 }, - { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 }, { X86::VMAXPDZrr, X86::VMAXPDZrm, 0 }, + { X86::VMAXCPDZrr, X86::VMAXCPDZrm, 0 }, + { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 }, + { X86::VMAXCPSZrr, X86::VMAXCPSZrm, 0 }, + { X86::VMAXSDZrr, X86::VMAXSDZrm, 0 }, + { X86::VMAXCSDZrr, X86::VMAXCSDZrm, 0 }, + { X86::VMAXSDZrr_Int, X86::VMAXSDZrm_Int, 0 }, + { X86::VMAXSSZrr, X86::VMAXSSZrm, 0 }, + { X86::VMAXCSSZrr, X86::VMAXCSSZrm, 0 }, + { X86::VMAXSSZrr_Int, X86::VMAXSSZrm_Int, 0 }, + { X86::VMINPDZrr, X86::VMINPDZrm, 0 }, + { X86::VMINCPDZrr, X86::VMINCPDZrm, 0 }, + { X86::VMINPSZrr, X86::VMINPSZrm, 0 }, + { X86::VMINCPSZrr, X86::VMINCPSZrm, 0 }, + { X86::VMINSDZrr, X86::VMINSDZrm, 0 }, + { X86::VMINCSDZrr, X86::VMINCSDZrm, 0 }, + { X86::VMINSDZrr_Int, X86::VMINSDZrm_Int, 0 }, + { X86::VMINSSZrr, X86::VMINSSZrm, 0 }, + { X86::VMINCSSZrr, X86::VMINCSSZrm, 0 }, + { X86::VMINSSZrr_Int, X86::VMINSSZrm_Int, 0 }, { X86::VPADDDZrr, X86::VPADDDZrm, 0 }, { X86::VPADDQZrr, X86::VPADDQZrm, 0 }, { X86::VPERMPDZri, X86::VPERMPDZmi, 0 }, @@ -1798,6 +1814,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VXORPDZ256rr, X86::VXORPDZ256rm, 0 }, { X86::VXORPSZ128rr, X86::VXORPSZ128rm, 0 }, { X86::VXORPSZ256rr, X86::VXORPSZ256rm, 0 }, + { X86::VMAXPDZ128rr, X86::VMAXPDZ128rm, 0 }, + { X86::VMAXPDZ256rr, X86::VMAXPDZ256rm, 0 }, + { X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rm, 0 }, + { X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rm, 0 }, + { X86::VMAXPSZ128rr, X86::VMAXPSZ128rm, 0 }, + { X86::VMAXPSZ256rr, X86::VMAXPSZ256rm, 0 }, + { X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rm, 0 }, + { X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rm, 0 }, + { X86::VMINPDZ128rr, X86::VMINPDZ128rm, 0 }, + { X86::VMINPDZ256rr, X86::VMINPDZ256rm, 0 }, + { X86::VMINCPDZ128rr, X86::VMINCPDZ128rm, 0 }, + { X86::VMINCPDZ256rr, X86::VMINCPDZ256rm, 0 }, + { X86::VMINPSZ128rr, X86::VMINPSZ128rm, 0 }, + { X86::VMINPSZ256rr, X86::VMINPSZ256rm, 0 }, + { X86::VMINCPSZ128rr, X86::VMINCPSZ128rm, 0 }, + { X86::VMINCPSZ256rr, X86::VMINCPSZ256rm, 0 }, // AES foldable instructions { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 }, diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll index 8916820..357c98a 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll @@ -226,6 +226,74 @@ define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) { } declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone +define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 { + ;CHECK-LABEL: stack_fold_maxpd + ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) + ret <2 x double> %2 +} +declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone + +define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 { + ;CHECK-LABEL: stack_fold_maxpd_commutable + ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) + ret <2 x double> %2 +} + +define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) #0 { + ;CHECK-LABEL: stack_fold_maxpd_ymm + ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) + ret <4 x double> %2 +} +declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone + +define <4 x double> @stack_fold_maxpd_ymm_commutable(<4 x double> %a0, <4 x double> %a1) #1 { + ;CHECK-LABEL: stack_fold_maxpd_ymm_commutable + ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) + ret <4 x double> %2 +} + +define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 { + ;CHECK-LABEL: stack_fold_maxps + ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) + ret <4 x float> %2 +} +declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone + +define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) #1 { + ;CHECK-LABEL: stack_fold_maxps_commutable + ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) + ret <4 x float> %2 +} + +define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) #0 { + ;CHECK-LABEL: stack_fold_maxps_ymm + ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) + ret <8 x float> %2 +} +declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone + +define <8 x float> @stack_fold_maxps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) #1 { + ;CHECK-LABEL: stack_fold_maxps_ymm_commutable + ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %2 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) + ret <8 x float> %2 +} + define double @stack_fold_mulsd(double %a0, double %a1) { ;CHECK-LABEL: stack_fold_mulsd ;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload @@ -437,3 +505,6 @@ define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) { %6 = fadd <8 x float> %5, ret <8 x float> %6 } + +attributes #0 = { "unsafe-fp-math"="false" } +attributes #1 = { "unsafe-fp-math"="true" } -- 2.7.4