From 2cf3769a7e7fa5aba6a43f3a91f121d6d447762b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 20 Feb 2018 18:44:21 +0000 Subject: [PATCH] [X86][3DNow] Regenerate intrinsics tests llvm-svn: 325609 --- llvm/test/CodeGen/X86/3dnow-intrinsics.ll | 719 ++++++++++++++++++++++++++++-- 1 file changed, 693 insertions(+), 26 deletions(-) diff --git a/llvm/test/CodeGen/X86/3dnow-intrinsics.ll b/llvm/test/CodeGen/X86/3dnow-intrinsics.ll index 6817df4..98e21f3 100644 --- a/llvm/test/CodeGen/X86/3dnow-intrinsics.ll +++ b/llvm/test/CodeGen/X86/3dnow-intrinsics.ll @@ -1,7 +1,22 @@ -; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefixes=CHECK,X64 define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone { -; CHECK: pavgusb +; X86-LABEL: test_pavgusb: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: pavgusb %mm1, %mm0 +; X86-NEXT: movq %mm0, (%eax) +; X86-NEXT: retl $4 +; +; X64-LABEL: test_pavgusb: +; X64: # %bb.0: # %entry +; X64-NEXT: pavgusb %mm1, %mm0 +; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X64-NEXT: retq entry: %0 = bitcast x86_mmx %a.coerce to <8 x i8> %1 = bitcast x86_mmx %b.coerce to <8 x i8> @@ -15,7 +30,32 @@ entry: declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone { -; CHECK: pf2id +; X86-LABEL: test_pf2id: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: pf2id {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pf2id: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm0, %mm0 +; X64-NEXT: pf2id %mm0, %mm0 +; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0) @@ -26,7 +66,37 @@ entry: declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone { -; CHECK: pfacc +; X86-LABEL: test_pfacc: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pfacc {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pfacc: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm1, %mm0 +; X64-NEXT: movdq2q %xmm0, %mm1 +; X64-NEXT: pfacc %mm0, %mm1 +; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = bitcast <2 x float> %b to x86_mmx @@ -38,7 +108,37 @@ entry: declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone { -; CHECK: pfadd +; X86-LABEL: test_pfadd: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pfadd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pfadd: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm1, %mm0 +; X64-NEXT: movdq2q %xmm0, %mm1 +; X64-NEXT: pfadd %mm0, %mm1 +; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = bitcast <2 x float> %b to x86_mmx @@ -50,7 +150,38 @@ entry: declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone { -; CHECK: pfcmpeq +; X86-LABEL: test_pfcmpeq: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pfcmpeq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pfcmpeq: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm1, %mm0 +; X64-NEXT: movdq2q %xmm0, %mm1 +; X64-NEXT: pfcmpeq %mm0, %mm1 +; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = bitcast <2 x float> %b to x86_mmx @@ -62,7 +193,38 @@ entry: declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone { -; CHECK: pfcmpge +; X86-LABEL: test_pfcmpge: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pfcmpge {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pfcmpge: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm1, %mm0 +; X64-NEXT: movdq2q %xmm0, %mm1 +; X64-NEXT: pfcmpge %mm0, %mm1 +; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = bitcast <2 x float> %b to x86_mmx @@ -74,7 +236,38 @@ entry: declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone { -; CHECK: pfcmpgt +; X86-LABEL: test_pfcmpgt: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pfcmpgt {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pfcmpgt: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm1, %mm0 +; X64-NEXT: movdq2q %xmm0, %mm1 +; X64-NEXT: pfcmpgt %mm0, %mm1 +; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = bitcast <2 x float> %b to x86_mmx @@ -86,7 +279,37 @@ entry: declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone { -; CHECK: pfmax +; X86-LABEL: test_pfmax: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pfmax {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pfmax: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm1, %mm0 +; X64-NEXT: movdq2q %xmm0, %mm1 +; X64-NEXT: pfmax %mm0, %mm1 +; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = bitcast <2 x float> %b to x86_mmx @@ -98,7 +321,37 @@ entry: declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone { -; CHECK: pfmin +; X86-LABEL: test_pfmin: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pfmin {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pfmin: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm1, %mm0 +; X64-NEXT: movdq2q %xmm0, %mm1 +; X64-NEXT: pfmin %mm0, %mm1 +; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = bitcast <2 x float> %b to x86_mmx @@ -110,7 +363,37 @@ entry: declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone { -; CHECK: pfmul +; X86-LABEL: test_pfmul: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pfmul {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pfmul: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm1, %mm0 +; X64-NEXT: movdq2q %xmm0, %mm1 +; X64-NEXT: pfmul %mm0, %mm1 +; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = bitcast <2 x float> %b to x86_mmx @@ -122,7 +405,31 @@ entry: declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone { -; CHECK: pfrcp +; X86-LABEL: test_pfrcp: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: pfrcp {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pfrcp: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm0, %mm0 +; X64-NEXT: pfrcp %mm0, %mm0 +; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0) @@ -133,7 +440,37 @@ entry: declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone { -; CHECK: pfrcpit1 +; X86-LABEL: test_pfrcpit1: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pfrcpit1 {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pfrcpit1: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm1, %mm0 +; X64-NEXT: movdq2q %xmm0, %mm1 +; X64-NEXT: pfrcpit1 %mm0, %mm1 +; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = bitcast <2 x float> %b to x86_mmx @@ -145,7 +482,37 @@ entry: declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone { -; CHECK: pfrcpit2 +; X86-LABEL: test_pfrcpit2: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pfrcpit2 {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pfrcpit2: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm1, %mm0 +; X64-NEXT: movdq2q %xmm0, %mm1 +; X64-NEXT: pfrcpit2 %mm0, %mm1 +; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = bitcast <2 x float> %b to x86_mmx @@ -157,7 +524,31 @@ entry: declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone { -; CHECK: pfrsqrt +; X86-LABEL: test_pfrsqrt: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: pfrsqrt {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pfrsqrt: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm0, %mm0 +; X64-NEXT: pfrsqrt %mm0, %mm0 +; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0) @@ -168,7 +559,37 @@ entry: declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone { -; CHECK: pfrsqit1 +; X86-LABEL: test_pfrsqit1: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pfrsqit1 {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pfrsqit1: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm1, %mm0 +; X64-NEXT: movdq2q %xmm0, %mm1 +; X64-NEXT: pfrsqit1 %mm0, %mm1 +; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = bitcast <2 x float> %b to x86_mmx @@ -180,7 +601,37 @@ entry: declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone { -; CHECK: pfsub +; X86-LABEL: test_pfsub: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pfsub {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pfsub: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm1, %mm0 +; X64-NEXT: movdq2q %xmm0, %mm1 +; X64-NEXT: pfsub %mm0, %mm1 +; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = bitcast <2 x float> %b to x86_mmx @@ -192,7 +643,37 @@ entry: declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone { -; CHECK: pfsubr +; X86-LABEL: test_pfsubr: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pfsubr {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pfsubr: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm1, %mm0 +; X64-NEXT: movdq2q %xmm0, %mm1 +; X64-NEXT: pfsubr %mm0, %mm1 +; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = bitcast <2 x float> %b to x86_mmx @@ -204,7 +685,26 @@ entry: declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone { -; CHECK: pi2fd +; X86-LABEL: test_pi2fd: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: pi2fd %mm0, %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pi2fd: +; X64: # %bb.0: # %entry +; X64-NEXT: pi2fd %mm0, %mm0 +; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %0 = bitcast x86_mmx %a.coerce to <2 x i32> %1 = bitcast <2 x i32> %0 to x86_mmx @@ -216,7 +716,20 @@ entry: declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone { -; CHECK: pmulhrw +; X86-LABEL: test_pmulhrw: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: pmulhrw %mm1, %mm0 +; X86-NEXT: movq %mm0, (%eax) +; X86-NEXT: retl $4 +; +; X64-LABEL: test_pmulhrw: +; X64: # %bb.0: # %entry +; X64-NEXT: pmulhrw %mm1, %mm0 +; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; X64-NEXT: retq entry: %0 = bitcast x86_mmx %a.coerce to <4 x i16> %1 = bitcast x86_mmx %b.coerce to <4 x i16> @@ -230,7 +743,32 @@ entry: declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone { -; CHECK: pf2iw +; X86-LABEL: test_pf2iw: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: pf2iw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pf2iw: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm0, %mm0 +; X64-NEXT: pf2iw %mm0, %mm0 +; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0) @@ -241,7 +779,37 @@ entry: declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone { -; CHECK: pfnacc +; X86-LABEL: test_pfnacc: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pfnacc {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pfnacc: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm1, %mm0 +; X64-NEXT: movdq2q %xmm0, %mm1 +; X64-NEXT: pfnacc %mm0, %mm1 +; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = bitcast <2 x float> %b to x86_mmx @@ -253,7 +821,37 @@ entry: declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone { -; CHECK: pfpnacc +; X86-LABEL: test_pfpnacc: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 20(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 16(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pfpnacc {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pfpnacc: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm1, %mm0 +; X64-NEXT: movdq2q %xmm0, %mm1 +; X64-NEXT: pfpnacc %mm0, %mm1 +; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = bitcast <2 x float> %b to x86_mmx @@ -265,7 +863,26 @@ entry: declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone { -; CHECK: pi2fw +; X86-LABEL: test_pi2fw: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: pi2fw %mm0, %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pi2fw: +; X64: # %bb.0: # %entry +; X64-NEXT: pi2fw %mm0, %mm0 +; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %0 = bitcast x86_mmx %a.coerce to <2 x i32> %1 = bitcast <2 x i32> %0 to x86_mmx @@ -277,7 +894,31 @@ entry: declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone { -; CHECK: pswapd {{.*#+}} mm0 = mem[1,0] +; X86-LABEL: test_pswapdsf: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: flds 12(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: flds 8(%ebp) +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: pswapd {{[0-9]+}}(%esp), %mm0 # mm0 = mem[1,0] +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pswapdsf: +; X64: # %bb.0: # %entry +; X64-NEXT: movdq2q %xmm0, %mm0 +; X64-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0] +; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: retq entry: %0 = bitcast <2 x float> %a to x86_mmx %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0) @@ -286,7 +927,33 @@ entry: } define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone { -; CHECK: pswapd {{.*#+}} mm0 = mem[1,0] +; X86-LABEL: test_pswapdsi: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: pswapd {{[0-9]+}}(%esp), %mm0 # mm0 = mem[1,0] +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test_pswapdsi: +; X64: # %bb.0: # %entry +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: pswapd -{{[0-9]+}}(%rsp), %mm0 # mm0 = mem[1,0] +; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] +; X64-NEXT: retq entry: %0 = bitcast <2 x i32> %a to x86_mmx %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0) -- 2.7.4