From 49841c38121941982a96c0f635cdc3b71d4f1800 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 7 Aug 2016 05:39:51 +0000 Subject: [PATCH] [X86] Add commutable floating point max/min instructions to the load folding tables. llvm-svn: 277949 --- llvm/lib/Target/X86/X86InstrInfo.cpp | 20 ++++ llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll | 133 +++++++++++++++++++++--- llvm/test/CodeGen/X86/stack-folding-fp-sse42.ll | 95 ++++++++++++++--- 3 files changed, 221 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 008d27e..a3c11cb 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1004,16 +1004,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 }, + { X86::MAXCPDrr, X86::MAXCPDrm, TB_ALIGN_16 }, { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 }, + { X86::MAXCPSrr, X86::MAXCPSrm, TB_ALIGN_16 }, { X86::MAXSDrr, X86::MAXSDrm, 0 }, + { X86::MAXCSDrr, X86::MAXCSDrm, 0 }, { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, { X86::MAXSSrr, X86::MAXSSrm, 0 }, + { X86::MAXCSSrr, X86::MAXCSSrm, 0 }, { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 }, + { X86::MINCPDrr, X86::MINCPDrm, TB_ALIGN_16 }, { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 }, + { X86::MINCPSrr, X86::MINCPSrm, TB_ALIGN_16 }, { X86::MINSDrr, X86::MINSDrm, 0 }, + { X86::MINCSDrr, X86::MINCSDrm, 0 }, { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, { X86::MINSSrr, X86::MINSSrm, 0 }, + { X86::MINCSSrr, X86::MINCSSrm, 0 }, { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, { X86::MOVLHPSrr, X86::MOVHPSrm, TB_NO_REVERSE }, { X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 }, @@ -1305,16 +1313,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, 0 }, { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, 0 }, { X86::VMAXPDrr, X86::VMAXPDrm, 0 }, + { X86::VMAXCPDrr, X86::VMAXCPDrm, 0 }, { X86::VMAXPSrr, X86::VMAXPSrm, 0 }, + { X86::VMAXCPSrr, X86::VMAXCPSrm, 0 }, { X86::VMAXSDrr, X86::VMAXSDrm, 0 }, + { X86::VMAXCSDrr, X86::VMAXCSDrm, 0 }, { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, 0 }, { X86::VMAXSSrr, X86::VMAXSSrm, 0 }, + { X86::VMAXCSSrr, X86::VMAXCSSrm, 0 }, { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, 0 }, { X86::VMINPDrr, X86::VMINPDrm, 0 }, + { X86::VMINCPDrr, X86::VMINCPDrm, 0 }, { X86::VMINPSrr, X86::VMINPSrm, 0 }, + { X86::VMINCPSrr, X86::VMINCPSrm, 0 }, { X86::VMINSDrr, X86::VMINSDrm, 0 }, + { X86::VMINCSDrr, X86::VMINCSDrm, 0 }, { X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 }, { X86::VMINSSrr, X86::VMINSSrm, 0 }, + { X86::VMINCSSrr, X86::VMINCSSrm, 0 }, { X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 }, { X86::VMOVLHPSrr, X86::VMOVHPSrm, TB_NO_REVERSE }, { X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 }, @@ -1459,9 +1475,13 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 }, { X86::VINSERTF128rr, X86::VINSERTF128rm, 0 }, { X86::VMAXPDYrr, X86::VMAXPDYrm, 0 }, + { X86::VMAXCPDYrr, X86::VMAXCPDYrm, 0 }, { X86::VMAXPSYrr, X86::VMAXPSYrm, 0 }, + { X86::VMAXCPSYrr, X86::VMAXCPSYrm, 0 }, { X86::VMINPDYrr, X86::VMINPDYrm, 0 }, + { X86::VMINCPDYrr, X86::VMINCPDYrm, 0 }, { X86::VMINPSYrr, X86::VMINPSYrm, 0 }, + { X86::VMINCPSYrr, X86::VMINCPSYrm, 0 }, { X86::VMULPDYrr, X86::VMULPDYrm, 0 }, { X86::VMULPSYrr, X86::VMULPSYrm, 0 }, { X86::VORPDYrr, X86::VORPDYrm, 0 }, diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll index 5dfdf4b..c788b4f 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll @@ -989,7 +989,7 @@ define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) { } declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone -define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) { +define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 { ;CHECK-LABEL: stack_fold_maxpd ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -998,7 +998,15 @@ define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) { } declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone -define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) { +define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 { + ;CHECK-LABEL: stack_fold_maxpd_commutable + ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) + ret <2 x double> %2 +} + +define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) #0 { ;CHECK-LABEL: stack_fold_maxpd_ymm ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -1007,7 +1015,15 @@ define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) { } declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone -define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) { +define <4 x double> @stack_fold_maxpd_ymm_commutable(<4 x double> %a0, <4 x double> %a1) #1 { + ;CHECK-LABEL: stack_fold_maxpd_ymm_commutable + ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) + ret <4 x double> %2 +} + +define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 { ;CHECK-LABEL: stack_fold_maxps ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -1016,7 +1032,15 @@ define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) { } declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone -define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) { +define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) #1 { + ;CHECK-LABEL: stack_fold_maxps_commutable + ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) + ret <4 x float> %2 +} + +define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) #0 { ;CHECK-LABEL: stack_fold_maxps_ymm ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -1025,7 +1049,15 @@ define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) { } declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone -define double @stack_fold_maxsd(double %a0, double %a1) { +define <8 x float> @stack_fold_maxps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) #1 { + ;CHECK-LABEL: stack_fold_maxps_ymm_commutable + ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) + ret <8 x float> %2 +} + +define double @stack_fold_maxsd(double %a0, double %a1) #0 { ;CHECK-LABEL: stack_fold_maxsd ;CHECK: vmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -1034,7 +1066,16 @@ define double @stack_fold_maxsd(double %a0, double %a1) { ret double %3 } -define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) { +define double @stack_fold_maxsd_commutable(double %a0, double %a1) #1 { + ;CHECK-LABEL: stack_fold_maxsd_commutable + ;CHECK: vmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = fcmp ogt double %a0, %a1 + %3 = select i1 %2, double %a0, double %a1 + ret double %3 +} + +define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) #0 { ;CHECK-LABEL: stack_fold_maxsd_int ;CHECK: vmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -1043,7 +1084,7 @@ define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) { } declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone -define float @stack_fold_maxss(float %a0, float %a1) { +define float @stack_fold_maxss(float %a0, float %a1) #0 { ;CHECK-LABEL: stack_fold_maxss ;CHECK: vmaxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -1052,7 +1093,16 @@ define float @stack_fold_maxss(float %a0, float %a1) { ret float %3 } -define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) { +define float @stack_fold_maxss_commutable(float %a0, float %a1) #1 { + ;CHECK-LABEL: stack_fold_maxss_commutable + ;CHECK: vmaxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = fcmp ogt float %a0, %a1 + %3 = select i1 %2, float %a0, float %a1 + ret float %3 +} + +define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) #0 { ;CHECK-LABEL: stack_fold_maxss_int ;CHECK: vmaxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -1061,7 +1111,7 @@ define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) { } declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone -define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) { +define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) #0 { ;CHECK-LABEL: stack_fold_minpd ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -1070,7 +1120,15 @@ define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) { } declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone -define <4 x double> @stack_fold_minpd_ymm(<4 x double> %a0, <4 x double> %a1) { +define <2 x double> @stack_fold_minpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 { + ;CHECK-LABEL: stack_fold_minpd_commutable + ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) + ret <2 x double> %2 +} + +define <4 x double> @stack_fold_minpd_ymm(<4 x double> %a0, <4 x double> %a1) #0 { ;CHECK-LABEL: stack_fold_minpd_ymm ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -1079,7 +1137,15 @@ define <4 x double> @stack_fold_minpd_ymm(<4 x double> %a0, <4 x double> %a1) { } declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone -define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) { +define <4 x double> @stack_fold_minpd_ymm_commutable(<4 x double> %a0, <4 x double> %a1) #1 { + ;CHECK-LABEL: stack_fold_minpd_ymm_commutable + ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) + ret <4 x double> %2 +} + +define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) #0 { ;CHECK-LABEL: stack_fold_minps ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -1088,7 +1154,15 @@ define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) { } declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone -define <8 x float> @stack_fold_minps_ymm(<8 x float> %a0, <8 x float> %a1) { +define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1) #1 { + ;CHECK-LABEL: stack_fold_minps_commutable + ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) + ret <4 x float> %2 +} + +define <8 x float> @stack_fold_minps_ymm(<8 x float> %a0, <8 x float> %a1) #0 { ;CHECK-LABEL: stack_fold_minps_ymm ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -1097,7 +1171,15 @@ define <8 x float> @stack_fold_minps_ymm(<8 x float> %a0, <8 x float> %a1) { } declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone -define double @stack_fold_minsd(double %a0, double %a1) { +define <8 x float> @stack_fold_minps_ymm_commutable(<8 x float> %a0, <8 x float> %a1) #1 { + ;CHECK-LABEL: stack_fold_minps_ymm_commutable + ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) + ret <8 x float> %2 +} + +define double @stack_fold_minsd(double %a0, double %a1) #0 { ;CHECK-LABEL: stack_fold_minsd ;CHECK: vminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -1106,6 +1188,15 @@ define double @stack_fold_minsd(double %a0, double %a1) { ret double %3 } +define double @stack_fold_minsd_commutable(double %a0, double %a1) #1 { + ;CHECK-LABEL: stack_fold_minsd_commutable + ;CHECK: vminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = fcmp olt double %a0, %a1 + %3 = select i1 %2, double %a0, double %a1 + ret double %3 +} + define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) { ;CHECK-LABEL: stack_fold_minsd_int ;CHECK: vminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload @@ -1115,7 +1206,7 @@ define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) { } declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone -define float @stack_fold_minss(float %a0, float %a1) { +define float @stack_fold_minss(float %a0, float %a1) #0 { ;CHECK-LABEL: stack_fold_minss ;CHECK: vminss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -1124,7 +1215,16 @@ define float @stack_fold_minss(float %a0, float %a1) { ret float %3 } -define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) { +define float @stack_fold_minss_commutable(float %a0, float %a1) #1 { + ;CHECK-LABEL: stack_fold_minss_commutable + ;CHECK: vminss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = fcmp olt float %a0, %a1 + %3 = select i1 %2, float %a0, float %a1 + ret float %3 +} + +define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) #0 { ;CHECK-LABEL: stack_fold_minss_int ;CHECK: vminss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -1880,3 +1980,6 @@ define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) { %6 = fadd <8 x float> %5, ret <8 x float> %6 } + +attributes #0 = { "unsafe-fp-math"="false" } +attributes #1 = { "unsafe-fp-math"="true" } diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-sse42.ll b/llvm/test/CodeGen/X86/stack-folding-fp-sse42.ll index 4c67535..be0c5b7 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-sse42.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-sse42.ll @@ -665,7 +665,7 @@ define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) { } declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone -define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) { +define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) #0 { ;CHECK-LABEL: stack_fold_maxpd ;CHECK: maxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -674,7 +674,15 @@ define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) { } declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone -define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) { +define <2 x double> @stack_fold_maxpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 { + ;CHECK-LABEL: stack_fold_maxpd_commutable + ;CHECK: maxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) + ret <2 x double> %2 +} + +define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) #0 { ;CHECK-LABEL: stack_fold_maxps ;CHECK: maxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -683,7 +691,15 @@ define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) { } declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone -define double @stack_fold_maxsd(double %a0, double %a1) { +define <4 x float> @stack_fold_maxps_commutable(<4 x float> %a0, <4 x float> %a1) #1 { + ;CHECK-LABEL: stack_fold_maxps_commutable + ;CHECK: maxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) + ret <4 x float> %2 +} + +define double @stack_fold_maxsd(double %a0, double %a1) #0 { ;CHECK-LABEL: stack_fold_maxsd ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -692,7 +708,16 @@ define double @stack_fold_maxsd(double %a0, double %a1) { ret double %3 } -define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) { +define double @stack_fold_maxsd_commutable(double %a0, double %a1) #1 { + ;CHECK-LABEL: stack_fold_maxsd_commutable + ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = fcmp ogt double %a0, %a1 + %3 = select i1 %2, double %a0, double %a1 + ret double %3 +} + +define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) #0 { ;CHECK-LABEL: stack_fold_maxsd_int ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -701,7 +726,7 @@ define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) { } declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone -define float @stack_fold_maxss(float %a0, float %a1) { +define float @stack_fold_maxss(float %a0, float %a1) #0 { ;CHECK-LABEL: stack_fold_maxss ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -710,7 +735,16 @@ define float @stack_fold_maxss(float %a0, float %a1) { ret float %3 } -define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) { +define float @stack_fold_maxss_commutable(float %a0, float %a1) #1 { + ;CHECK-LABEL: stack_fold_maxss_commutable + ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = fcmp ogt float %a0, %a1 + %3 = select i1 %2, float %a0, float %a1 + ret float %3 +} + +define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) #0 { ;CHECK-LABEL: stack_fold_maxss_int ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -719,7 +753,7 @@ define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) { } declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone -define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) { +define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) #0 { ;CHECK-LABEL: stack_fold_minpd ;CHECK: minpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -728,7 +762,15 @@ define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) { } declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone -define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) { +define <2 x double> @stack_fold_minpd_commutable(<2 x double> %a0, <2 x double> %a1) #1 { + ;CHECK-LABEL: stack_fold_minpd_commutable + ;CHECK: minpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) + ret <2 x double> %2 +} + +define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) #0 { ;CHECK-LABEL: stack_fold_minps ;CHECK: minps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -737,7 +779,15 @@ define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) { } declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone -define double @stack_fold_minsd(double %a0, double %a1) { +define <4 x float> @stack_fold_minps_commutable(<4 x float> %a0, <4 x float> %a1) #1 { + ;CHECK-LABEL: stack_fold_minps_commutable + ;CHECK: minps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) + ret <4 x float> %2 +} + +define double @stack_fold_minsd(double %a0, double %a1) #0 { ;CHECK-LABEL: stack_fold_minsd ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -746,7 +796,16 @@ define double @stack_fold_minsd(double %a0, double %a1) { ret double %3 } -define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) { +define double @stack_fold_minsd_commutable(double %a0, double %a1) #1 { + ;CHECK-LABEL: stack_fold_minsd_commutable + ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = fcmp olt double %a0, %a1 + %3 = select i1 %2, double %a0, double %a1 + ret double %3 +} + +define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) #0 { ;CHECK-LABEL: stack_fold_minsd_int ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -755,7 +814,7 @@ define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) { } declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone -define float @stack_fold_minss(float %a0, float %a1) { +define float @stack_fold_minss(float %a0, float %a1) #0 { ;CHECK-LABEL: stack_fold_minss ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -764,7 +823,16 @@ define float @stack_fold_minss(float %a0, float %a1) { ret float %3 } -define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) { +define float @stack_fold_minss_commutable(float %a0, float %a1) #1 { + ;CHECK-LABEL: stack_fold_minss_commutable + ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = fcmp olt float %a0, %a1 + %3 = select i1 %2, float %a0, float %a1 + ret float %3 +} + +define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) #0 { ;CHECK-LABEL: stack_fold_minss_int ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -1157,3 +1225,6 @@ define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) { %6 = fadd <4 x float> %5, ret <4 x float> %6 } + +attributes #0 = { "unsafe-fp-math"="false" } +attributes #1 = { "unsafe-fp-math"="true" } -- 2.7.4