From 4891c724aabac8c78d95391db76d04db4ff8d9d5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 27 Aug 2016 05:22:08 +0000 Subject: [PATCH] [AVX-512] Add load folding for EVEX vcmpps/pd/ss/sd. llvm-svn: 279912 --- llvm/lib/Target/X86/X86InstrInfo.cpp | 8 +++++ llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll | 18 +++++++++++ llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll | 36 ++++++++++++++++++++++ 3 files changed, 62 insertions(+) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index fdd8114..a16ff3f 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1700,6 +1700,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VDIVSSZrr_Int, X86::VDIVSSZrm_Int, 0 }, { X86::VDIVSDZrr, X86::VDIVSDZrm, 0 }, { X86::VDIVSDZrr_Int, X86::VDIVSDZrm_Int, 0 }, + { X86::VCMPPDZrri, X86::VCMPPDZrmi, 0 }, + { X86::VCMPPSZrri, X86::VCMPPSZrmi, 0 }, + { X86::VCMPSDZrr, X86::VCMPSDZrm, 0 }, + { X86::VCMPSSZrr, X86::VCMPSSZrm, 0 }, { X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, 0 }, { X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, 0 }, { X86::VANDPDZrr, X86::VANDPDZrm, 0 }, @@ -1828,6 +1832,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMINPSZ256rr, X86::VMINPSZ256rm, 0 }, { X86::VMINCPSZ128rr, X86::VMINCPSZ128rm, 0 }, { X86::VMINCPSZ256rr, X86::VMINCPSZ256rm, 0 }, + { X86::VCMPPDZ128rri, X86::VCMPPDZ128rmi, 0 }, + { X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0 }, + { X86::VCMPPSZ128rri, X86::VCMPPSZ128rmi, 0 }, + { X86::VCMPPSZ256rri, X86::VCMPPSZ256rmi, 0 }, // AES foldable instructions { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 }, diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll index a192b5b..86fd2dd 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll @@ -116,6 +116,24 @@ define <16 x float> @stack_fold_andps_zmm(<16 x float> %a0, <16 x float> %a1) { ret <16 x float> %6 } +define i8 @stack_fold_cmppd(<8 x double> %a0, <8 x double> %a1) { + ;CHECK-LABEL: stack_fold_cmppd + ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-9]}} {{.*#+}} 64-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a0, <8 x double> %a1, i32 0, i8 -1, i32 4) + ret i8 %res +} +declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32) + +define i16 @stack_fold_cmpps(<16 x float> %a0, <16 x float> %a1) { + ;CHECK-LABEL: stack_fold_cmpps + ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-9]*}} {{.*#+}} 64-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a0, <16 x float> %a1, i32 0, i16 -1, i32 4) + ret i16 %res +} +declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32) + define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) { ;CHECK-LABEL: stack_fold_divsd_int ;CHECK: vdivsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll index 8273879..f551e33 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll @@ -148,6 +148,42 @@ define <8 x float> @stack_fold_andps_ymm(<8 x float> %a0, <8 x float> %a1) { ret <8 x float> %6 } +define i8 @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) { + ;CHECK-LABEL: stack_fold_cmppd + ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%k[0-9]}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a0, <2 x double> %a1, i32 0, i8 -1) + ret i8 %res +} +declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8) + +define i8 @stack_fold_cmppd_ymm(<4 x double> %a0, <4 x double> %a1) { + ;CHECK-LABEL: stack_fold_cmppd_ymm + ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%k[0-9]}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i32 0, i8 -1) + ret i8 %res +} +declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> , <4 x double> , i32, i8) + +define i8 @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) { + ;CHECK-LABEL: stack_fold_cmpps + ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%k[0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a0, <4 x float> %a1, i32 0, i8 -1) + ret i8 %res +} +declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> , <4 x float> , i32, i8) + +define i8 @stack_fold_cmpps_ymm(<8 x float> %a0, <8 x float> %a1) { + ;CHECK-LABEL: stack_fold_cmpps_ymm + ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%k[0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() + %res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 0, i8 -1) + ret i8 %res +} +declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8) + define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 { ;CHECK-LABEL: stack_fold_maxpd ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload -- 2.7.4