From: Sanjay Patel Date: Wed, 14 Aug 2019 20:21:30 +0000 (+0000) Subject: [x86] add tests for fadd reduction; NFC X-Git-Tag: llvmorg-11-init~11820 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a8ba919c01df9e1d9792c8de2347411f3439b45a;p=platform%2Fupstream%2Fllvm.git [x86] add tests for fadd reduction; NFC More coverage for D66236. llvm-svn: 368913 --- diff --git a/llvm/test/CodeGen/X86/haddsub.ll b/llvm/test/CodeGen/X86/haddsub.ll index a349065..78270a5 100644 --- a/llvm/test/CodeGen/X86/haddsub.ll +++ b/llvm/test/CodeGen/X86/haddsub.ll @@ -1985,3 +1985,135 @@ define float @hadd32_16_optsize(<16 x float> %x225) optsize { %x230 = extractelement <16 x float> %x229, i32 0 ret float %x230 } + +define float @partial_reduction_fadd_v8f32(<8 x float> %x) { +; SSE3-SLOW-LABEL: partial_reduction_fadd_v8f32: +; SSE3-SLOW: # %bb.0: +; SSE3-SLOW-NEXT: movaps %xmm0, %xmm1 +; SSE3-SLOW-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE3-SLOW-NEXT: addps %xmm0, %xmm1 +; SSE3-SLOW-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] +; SSE3-SLOW-NEXT: addss %xmm0, %xmm1 +; SSE3-SLOW-NEXT: movaps %xmm1, %xmm0 +; SSE3-SLOW-NEXT: retq +; +; SSE3-FAST-LABEL: partial_reduction_fadd_v8f32: +; SSE3-FAST: # %bb.0: +; SSE3-FAST-NEXT: movaps %xmm0, %xmm1 +; SSE3-FAST-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE3-FAST-NEXT: addps %xmm0, %xmm1 +; SSE3-FAST-NEXT: haddps %xmm1, %xmm1 +; SSE3-FAST-NEXT: movaps %xmm1, %xmm0 +; SSE3-FAST-NEXT: retq +; +; AVX-SLOW-LABEL: partial_reduction_fadd_v8f32: +; AVX-SLOW: # %bb.0: +; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-SLOW-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-SLOW-NEXT: vzeroupper +; AVX-SLOW-NEXT: retq +; +; AVX-FAST-LABEL: partial_reduction_fadd_v8f32: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-FAST-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vzeroupper +; AVX-FAST-NEXT: retq + %x23 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> + %x0213 = fadd <8 x float> %x, %x23 + %x13 = shufflevector <8 x float> %x0213, <8 x float> undef, <8 x i32> + %x0123 = fadd nsz reassoc <8 x float> %x0213, %x13 + %r = extractelement <8 x float> %x0123, i32 0 + ret float %r +} + +define float @partial_reduction_fadd_v8f32_wrong_flags(<8 x float> %x) { +; SSE3-SLOW-LABEL: partial_reduction_fadd_v8f32_wrong_flags: +; SSE3-SLOW: # %bb.0: +; SSE3-SLOW-NEXT: movaps %xmm0, %xmm1 +; SSE3-SLOW-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE3-SLOW-NEXT: addps %xmm0, %xmm1 +; SSE3-SLOW-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] +; SSE3-SLOW-NEXT: addss %xmm0, %xmm1 +; SSE3-SLOW-NEXT: movaps %xmm1, %xmm0 +; SSE3-SLOW-NEXT: retq +; +; SSE3-FAST-LABEL: partial_reduction_fadd_v8f32_wrong_flags: +; SSE3-FAST: # %bb.0: +; SSE3-FAST-NEXT: movaps %xmm0, %xmm1 +; SSE3-FAST-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE3-FAST-NEXT: addps %xmm0, %xmm1 +; SSE3-FAST-NEXT: haddps %xmm1, %xmm1 +; SSE3-FAST-NEXT: movaps %xmm1, %xmm0 +; SSE3-FAST-NEXT: retq +; +; AVX-SLOW-LABEL: partial_reduction_fadd_v8f32_wrong_flags: +; AVX-SLOW: # %bb.0: +; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-SLOW-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-SLOW-NEXT: vzeroupper +; AVX-SLOW-NEXT: retq +; +; AVX-FAST-LABEL: partial_reduction_fadd_v8f32_wrong_flags: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-FAST-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vzeroupper +; AVX-FAST-NEXT: retq + %x23 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> + %x0213 = fadd fast <8 x float> %x, %x23 + %x13 = shufflevector <8 x float> %x0213, <8 x float> undef, <8 x i32> + %x0123 = fadd ninf nnan <8 x float> %x0213, %x13 + %r = extractelement <8 x float> %x0123, i32 0 + ret float %r +} + +define float @partial_reduction_fadd_v16f32(<16 x float> %x) { +; SSE3-SLOW-LABEL: partial_reduction_fadd_v16f32: +; SSE3-SLOW: # %bb.0: +; SSE3-SLOW-NEXT: movaps %xmm0, %xmm1 +; SSE3-SLOW-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE3-SLOW-NEXT: addps %xmm0, %xmm1 +; SSE3-SLOW-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] +; SSE3-SLOW-NEXT: addss %xmm0, %xmm1 +; SSE3-SLOW-NEXT: movaps %xmm1, %xmm0 +; SSE3-SLOW-NEXT: retq +; +; SSE3-FAST-LABEL: partial_reduction_fadd_v16f32: +; SSE3-FAST: # %bb.0: +; SSE3-FAST-NEXT: movaps %xmm0, %xmm1 +; SSE3-FAST-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE3-FAST-NEXT: addps %xmm0, %xmm1 +; SSE3-FAST-NEXT: haddps %xmm1, %xmm1 +; SSE3-FAST-NEXT: movaps %xmm1, %xmm0 +; SSE3-FAST-NEXT: retq +; +; AVX-SLOW-LABEL: partial_reduction_fadd_v16f32: +; AVX-SLOW: # %bb.0: +; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-SLOW-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-SLOW-NEXT: vzeroupper +; AVX-SLOW-NEXT: retq +; +; AVX-FAST-LABEL: partial_reduction_fadd_v16f32: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-FAST-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vzeroupper +; AVX-FAST-NEXT: retq + %x23 = shufflevector <16 x float> %x, <16 x float> undef, <16 x i32> + %x0213 = fadd <16 x float> %x, %x23 + %x13 = shufflevector <16 x float> %x0213, <16 x float> undef, <16 x i32> + %x0123 = fadd reassoc nsz <16 x float> %x0213, %x13 + %r = extractelement <16 x float> %x0123, i32 0 + ret float %r +}