From: Simon Pilgrim Date: Mon, 30 Jan 2023 14:00:45 +0000 (+0000) Subject: [X86] Add SSE2 test coverage to vector comparison all_of/any_of tests X-Git-Tag: upstream/17.0.6~19184 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=56656f8378c842ef83af2ec7606e185d6fdefaf1;p=platform%2Fupstream%2Fllvm.git [X86] Add SSE2 test coverage to vector comparison all_of/any_of tests --- diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll index bbc6757..581f1d7 100644 --- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll @@ -1,8 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512 define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_v2f64_sext: @@ -272,15 +273,33 @@ define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) { } define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) { -; SSE-LABEL: test_v2i64_sext: -; SSE: # %bb.0: -; SSE-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE-NEXT: movmskpd %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: cmpl $3, %ecx -; SSE-NEXT: sete %al -; SSE-NEXT: negq %rax -; SSE-NEXT: retq +; SSE2-LABEL: test_v2i64_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2] +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: movmskpd %xmm1, %ecx +; SSE2-NEXT: xorl %eax, %eax +; SSE2-NEXT: cmpl $3, %ecx +; SSE2-NEXT: sete %al +; SSE2-NEXT: negq %rax +; SSE2-NEXT: retq +; +; SSE42-LABEL: test_v2i64_sext: +; SSE42: # %bb.0: +; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 +; SSE42-NEXT: movmskpd %xmm0, %ecx +; SSE42-NEXT: xorl %eax, %eax +; SSE42-NEXT: cmpl $3, %ecx +; SSE42-NEXT: sete %al +; SSE42-NEXT: negq %rax +; SSE42-NEXT: retq ; ; AVX-LABEL: test_v2i64_sext: ; AVX: # %bb.0: @@ -310,17 +329,44 @@ define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) { } define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) { -; SSE-LABEL: test_v4i64_sext: -; SSE: # %bb.0: -; SSE-NEXT: pcmpgtq %xmm3, %xmm1 -; SSE-NEXT: pcmpgtq %xmm2, %xmm0 -; SSE-NEXT: pand %xmm1, %xmm0 -; SSE-NEXT: movmskpd %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: cmpl $3, %ecx -; SSE-NEXT: sete %al -; SSE-NEXT: negq %rax -; SSE-NEXT: retq +; SSE2-LABEL: test_v4i64_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] +; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm5 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2] +; SSE2-NEXT: pand %xmm5, %xmm3 +; SSE2-NEXT: por %xmm1, %xmm3 +; SSE2-NEXT: pxor %xmm4, %xmm2 +; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] +; SSE2-NEXT: pand %xmm1, %xmm2 +; SSE2-NEXT: por %xmm0, %xmm2 +; SSE2-NEXT: pand %xmm3, %xmm2 +; SSE2-NEXT: movmskpd %xmm2, %ecx +; SSE2-NEXT: xorl %eax, %eax +; SSE2-NEXT: cmpl $3, %ecx +; SSE2-NEXT: sete %al +; SSE2-NEXT: negq %rax +; SSE2-NEXT: retq +; +; SSE42-LABEL: test_v4i64_sext: +; SSE42: # %bb.0: +; SSE42-NEXT: pcmpgtq %xmm3, %xmm1 +; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 +; SSE42-NEXT: pand %xmm1, %xmm0 +; SSE42-NEXT: movmskpd %xmm0, %ecx +; SSE42-NEXT: xorl %eax, %eax +; SSE42-NEXT: cmpl $3, %ecx +; SSE42-NEXT: sete %al +; SSE42-NEXT: negq %rax +; SSE42-NEXT: retq ; ; AVX1-LABEL: test_v4i64_sext: ; AVX1: # %bb.0: @@ -369,17 +415,44 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) { } define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) { -; SSE-LABEL: test_v4i64_legal_sext: -; SSE: # %bb.0: -; SSE-NEXT: pcmpgtq %xmm3, %xmm1 -; SSE-NEXT: pcmpgtq %xmm2, %xmm0 -; SSE-NEXT: packssdw %xmm1, %xmm0 -; SSE-NEXT: movmskps %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: cmpl $15, %ecx -; SSE-NEXT: sete %al -; SSE-NEXT: negq %rax -; SSE-NEXT: retq +; SSE2-LABEL: test_v4i64_legal_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] +; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm5 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2] +; SSE2-NEXT: pand %xmm5, %xmm3 +; SSE2-NEXT: por %xmm1, %xmm3 +; SSE2-NEXT: pxor %xmm4, %xmm2 +; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] +; SSE2-NEXT: pand %xmm1, %xmm2 +; SSE2-NEXT: por %xmm0, %xmm2 +; SSE2-NEXT: packssdw %xmm3, %xmm2 +; SSE2-NEXT: movmskps %xmm2, %ecx +; SSE2-NEXT: xorl %eax, %eax +; SSE2-NEXT: cmpl $15, %ecx +; SSE2-NEXT: sete %al +; SSE2-NEXT: negq %rax +; SSE2-NEXT: retq +; +; SSE42-LABEL: test_v4i64_legal_sext: +; SSE42: # %bb.0: +; SSE42-NEXT: pcmpgtq %xmm3, %xmm1 +; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 +; SSE42-NEXT: packssdw %xmm1, %xmm0 +; SSE42-NEXT: movmskps %xmm0, %ecx +; SSE42-NEXT: xorl %eax, %eax +; SSE42-NEXT: cmpl $15, %ecx +; SSE42-NEXT: sete %al +; SSE42-NEXT: negq %rax +; SSE42-NEXT: retq ; ; AVX1-LABEL: test_v4i64_legal_sext: ; AVX1: # %bb.0: @@ -1080,16 +1153,32 @@ define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) { } define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) { -; SSE-LABEL: bool_reduction_v2i64: -; SSE: # %bb.0: -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; SSE-NEXT: pxor %xmm2, %xmm1 -; SSE-NEXT: pxor %xmm2, %xmm0 -; SSE-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE-NEXT: movmskpd %xmm0, %eax -; SSE-NEXT: cmpb $3, %al -; SSE-NEXT: sete %al -; SSE-NEXT: retq +; SSE2-LABEL: bool_reduction_v2i64: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2] +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: movmskpd %xmm1, %eax +; SSE2-NEXT: cmpb $3, %al +; SSE2-NEXT: sete %al +; SSE2-NEXT: retq +; +; SSE42-LABEL: bool_reduction_v2i64: +; SSE42: # %bb.0: +; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; SSE42-NEXT: pxor %xmm2, %xmm1 +; SSE42-NEXT: pxor %xmm2, %xmm0 +; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 +; SSE42-NEXT: movmskpd %xmm0, %eax +; SSE42-NEXT: cmpb $3, %al +; SSE42-NEXT: sete %al +; SSE42-NEXT: retq ; ; AVX-LABEL: bool_reduction_v2i64: ; AVX: # %bb.0: @@ -1225,15 +1314,40 @@ define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) { } define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) { -; SSE-LABEL: bool_reduction_v4i64: -; SSE: # %bb.0: -; SSE-NEXT: pcmpgtq %xmm1, %xmm3 -; SSE-NEXT: pcmpgtq %xmm0, %xmm2 -; SSE-NEXT: packssdw %xmm3, %xmm2 -; SSE-NEXT: movmskps %xmm2, %eax -; SSE-NEXT: cmpb $15, %al -; SSE-NEXT: sete %al -; SSE-NEXT: retq +; SSE2-LABEL: bool_reduction_v4i64: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] +; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: movdqa %xmm3, %xmm5 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,0,2,2] +; SSE2-NEXT: pand %xmm5, %xmm1 +; SSE2-NEXT: por %xmm3, %xmm1 +; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: pxor %xmm4, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] +; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: movmskps %xmm0, %eax +; SSE2-NEXT: cmpb $15, %al +; SSE2-NEXT: sete %al +; SSE2-NEXT: retq +; +; SSE42-LABEL: bool_reduction_v4i64: +; SSE42: # %bb.0: +; SSE42-NEXT: pcmpgtq %xmm1, %xmm3 +; SSE42-NEXT: pcmpgtq %xmm0, %xmm2 +; SSE42-NEXT: packssdw %xmm3, %xmm2 +; SSE42-NEXT: movmskps %xmm2, %eax +; SSE42-NEXT: cmpb $15, %al +; SSE42-NEXT: sete %al +; SSE42-NEXT: retq ; ; AVX1-LABEL: bool_reduction_v4i64: ; AVX1: # %bb.0: @@ -1275,18 +1389,37 @@ define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) { } define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) { -; SSE-LABEL: bool_reduction_v8i32: -; SSE: # %bb.0: -; SSE-NEXT: pminud %xmm1, %xmm3 -; SSE-NEXT: pcmpeqd %xmm1, %xmm3 -; SSE-NEXT: pminud %xmm0, %xmm2 -; SSE-NEXT: pcmpeqd %xmm0, %xmm2 -; SSE-NEXT: packssdw %xmm3, %xmm2 -; SSE-NEXT: packsswb %xmm2, %xmm2 -; SSE-NEXT: pmovmskb %xmm2, %eax -; SSE-NEXT: cmpb $-1, %al -; SSE-NEXT: sete %al -; SSE-NEXT: retq +; SSE2-LABEL: bool_reduction_v8i32: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE2-NEXT: pxor %xmm3, %xmm1 +; SSE2-NEXT: pxor %xmm4, %xmm2 +; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm3, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: packsswb %xmm0, %xmm0 +; SSE2-NEXT: pmovmskb %xmm0, %eax +; SSE2-NEXT: cmpb $-1, %al +; SSE2-NEXT: sete %al +; SSE2-NEXT: retq +; +; SSE42-LABEL: bool_reduction_v8i32: +; SSE42: # %bb.0: +; SSE42-NEXT: pminud %xmm1, %xmm3 +; SSE42-NEXT: pcmpeqd %xmm1, %xmm3 +; SSE42-NEXT: pminud %xmm0, %xmm2 +; SSE42-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE42-NEXT: packssdw %xmm3, %xmm2 +; SSE42-NEXT: packsswb %xmm2, %xmm2 +; SSE42-NEXT: pmovmskb %xmm2, %eax +; SSE42-NEXT: cmpb $-1, %al +; SSE42-NEXT: sete %al +; SSE42-NEXT: retq ; ; AVX1-LABEL: bool_reduction_v8i32: ; AVX1: # %bb.0: @@ -1331,14 +1464,24 @@ define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) { } define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) { -; SSE-LABEL: bool_reduction_v16i16: -; SSE: # %bb.0: -; SSE-NEXT: psubb %xmm3, %xmm1 -; SSE-NEXT: psubb %xmm2, %xmm0 -; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: ptest %xmm0, %xmm0 -; SSE-NEXT: sete %al -; SSE-NEXT: retq +; SSE2-LABEL: bool_reduction_v16i16: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqb %xmm3, %xmm1 +; SSE2-NEXT: pcmpeqb %xmm2, %xmm0 +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: pmovmskb %xmm0, %eax +; SSE2-NEXT: cmpw $-1, %ax +; SSE2-NEXT: sete %al +; SSE2-NEXT: retq +; +; SSE42-LABEL: bool_reduction_v16i16: +; SSE42: # %bb.0: +; SSE42-NEXT: psubb %xmm3, %xmm1 +; SSE42-NEXT: psubb %xmm2, %xmm0 +; SSE42-NEXT: por %xmm1, %xmm0 +; SSE42-NEXT: ptest %xmm0, %xmm0 +; SSE42-NEXT: sete %al +; SSE42-NEXT: retq ; ; AVX1-LABEL: bool_reduction_v16i16: ; AVX1: # %bb.0: @@ -1381,14 +1524,24 @@ define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) { } define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) { -; SSE-LABEL: bool_reduction_v32i8: -; SSE: # %bb.0: -; SSE-NEXT: psubb %xmm3, %xmm1 -; SSE-NEXT: psubb %xmm2, %xmm0 -; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: ptest %xmm0, %xmm0 -; SSE-NEXT: sete %al -; SSE-NEXT: retq +; SSE2-LABEL: bool_reduction_v32i8: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqb %xmm3, %xmm1 +; SSE2-NEXT: pcmpeqb %xmm2, %xmm0 +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: pmovmskb %xmm0, %eax +; SSE2-NEXT: cmpw $-1, %ax +; SSE2-NEXT: sete %al +; SSE2-NEXT: retq +; +; SSE42-LABEL: bool_reduction_v32i8: +; SSE42: # %bb.0: +; SSE42-NEXT: psubb %xmm3, %xmm1 +; SSE42-NEXT: psubb %xmm2, %xmm0 +; SSE42-NEXT: por %xmm1, %xmm0 +; SSE42-NEXT: ptest %xmm0, %xmm0 +; SSE42-NEXT: sete %al +; SSE42-NEXT: retq ; ; AVX1-LABEL: bool_reduction_v32i8: ; AVX1: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll index aee0fa8..9e68f4c 100644 --- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll @@ -1,8 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512 define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_v2f64_sext: @@ -254,14 +255,31 @@ define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) { } define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) { -; SSE-LABEL: test_v2i64_sext: -; SSE: # %bb.0: -; SSE-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE-NEXT: movmskpd %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: negl %ecx -; SSE-NEXT: sbbq %rax, %rax -; SSE-NEXT: retq +; SSE2-LABEL: test_v2i64_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2] +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: movmskpd %xmm1, %ecx +; SSE2-NEXT: xorl %eax, %eax +; SSE2-NEXT: negl %ecx +; SSE2-NEXT: sbbq %rax, %rax +; SSE2-NEXT: retq +; +; SSE42-LABEL: test_v2i64_sext: +; SSE42: # %bb.0: +; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 +; SSE42-NEXT: movmskpd %xmm0, %ecx +; SSE42-NEXT: xorl %eax, %eax +; SSE42-NEXT: negl %ecx +; SSE42-NEXT: sbbq %rax, %rax +; SSE42-NEXT: retq ; ; AVX-LABEL: test_v2i64_sext: ; AVX: # %bb.0: @@ -289,16 +307,42 @@ define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) { } define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) { -; SSE-LABEL: test_v4i64_sext: -; SSE: # %bb.0: -; SSE-NEXT: pcmpgtq %xmm3, %xmm1 -; SSE-NEXT: pcmpgtq %xmm2, %xmm0 -; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: movmskpd %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: negl %ecx -; SSE-NEXT: sbbq %rax, %rax -; SSE-NEXT: retq +; SSE2-LABEL: test_v4i64_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] +; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm5 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2] +; SSE2-NEXT: pand %xmm5, %xmm3 +; SSE2-NEXT: por %xmm1, %xmm3 +; SSE2-NEXT: pxor %xmm4, %xmm2 +; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] +; SSE2-NEXT: pand %xmm1, %xmm2 +; SSE2-NEXT: por %xmm0, %xmm2 +; SSE2-NEXT: por %xmm3, %xmm2 +; SSE2-NEXT: movmskpd %xmm2, %ecx +; SSE2-NEXT: xorl %eax, %eax +; SSE2-NEXT: negl %ecx +; SSE2-NEXT: sbbq %rax, %rax +; SSE2-NEXT: retq +; +; SSE42-LABEL: test_v4i64_sext: +; SSE42: # %bb.0: +; SSE42-NEXT: pcmpgtq %xmm3, %xmm1 +; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 +; SSE42-NEXT: por %xmm1, %xmm0 +; SSE42-NEXT: movmskpd %xmm0, %ecx +; SSE42-NEXT: xorl %eax, %eax +; SSE42-NEXT: negl %ecx +; SSE42-NEXT: sbbq %rax, %rax +; SSE42-NEXT: retq ; ; AVX1-LABEL: test_v4i64_sext: ; AVX1: # %bb.0: @@ -344,16 +388,42 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) { } define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) { -; SSE-LABEL: test_v4i64_legal_sext: -; SSE: # %bb.0: -; SSE-NEXT: pcmpgtq %xmm3, %xmm1 -; SSE-NEXT: pcmpgtq %xmm2, %xmm0 -; SSE-NEXT: packssdw %xmm1, %xmm0 -; SSE-NEXT: movmskps %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: negl %ecx -; SSE-NEXT: sbbq %rax, %rax -; SSE-NEXT: retq +; SSE2-LABEL: test_v4i64_legal_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] +; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm5 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2] +; SSE2-NEXT: pand %xmm5, %xmm3 +; SSE2-NEXT: por %xmm1, %xmm3 +; SSE2-NEXT: pxor %xmm4, %xmm2 +; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] +; SSE2-NEXT: pand %xmm1, %xmm2 +; SSE2-NEXT: por %xmm0, %xmm2 +; SSE2-NEXT: packssdw %xmm3, %xmm2 +; SSE2-NEXT: movmskps %xmm2, %ecx +; SSE2-NEXT: xorl %eax, %eax +; SSE2-NEXT: negl %ecx +; SSE2-NEXT: sbbq %rax, %rax +; SSE2-NEXT: retq +; +; SSE42-LABEL: test_v4i64_legal_sext: +; SSE42: # %bb.0: +; SSE42-NEXT: pcmpgtq %xmm3, %xmm1 +; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 +; SSE42-NEXT: packssdw %xmm1, %xmm0 +; SSE42-NEXT: movmskps %xmm0, %ecx +; SSE42-NEXT: xorl %eax, %eax +; SSE42-NEXT: negl %ecx +; SSE42-NEXT: sbbq %rax, %rax +; SSE42-NEXT: retq ; ; AVX1-LABEL: test_v4i64_legal_sext: ; AVX1: # %bb.0: @@ -978,16 +1048,32 @@ define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) { } define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) { -; SSE-LABEL: bool_reduction_v2i64: -; SSE: # %bb.0: -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; SSE-NEXT: pxor %xmm2, %xmm1 -; SSE-NEXT: pxor %xmm2, %xmm0 -; SSE-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE-NEXT: movmskpd %xmm0, %eax -; SSE-NEXT: testl %eax, %eax -; SSE-NEXT: setne %al -; SSE-NEXT: retq +; SSE2-LABEL: bool_reduction_v2i64: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2] +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: movmskpd %xmm1, %eax +; SSE2-NEXT: testl %eax, %eax +; SSE2-NEXT: setne %al +; SSE2-NEXT: retq +; +; SSE42-LABEL: bool_reduction_v2i64: +; SSE42: # %bb.0: +; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; SSE42-NEXT: pxor %xmm2, %xmm1 +; SSE42-NEXT: pxor %xmm2, %xmm0 +; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 +; SSE42-NEXT: movmskpd %xmm0, %eax +; SSE42-NEXT: testl %eax, %eax +; SSE42-NEXT: setne %al +; SSE42-NEXT: retq ; ; AVX-LABEL: bool_reduction_v2i64: ; AVX: # %bb.0: @@ -1015,12 +1101,20 @@ define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) { } define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { -; SSE-LABEL: bool_reduction_v4i32: -; SSE: # %bb.0: -; SSE-NEXT: psubd %xmm1, %xmm0 -; SSE-NEXT: ptest %xmm0, %xmm0 -; SSE-NEXT: setne %al -; SSE-NEXT: retq +; SSE2-LABEL: bool_reduction_v4i32: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE2-NEXT: movmskps %xmm0, %eax +; SSE2-NEXT: cmpl $15, %eax +; SSE2-NEXT: setne %al +; SSE2-NEXT: retq +; +; SSE42-LABEL: bool_reduction_v4i32: +; SSE42: # %bb.0: +; SSE42-NEXT: psubd %xmm1, %xmm0 +; SSE42-NEXT: ptest %xmm0, %xmm0 +; SSE42-NEXT: setne %al +; SSE42-NEXT: retq ; ; AVX-LABEL: bool_reduction_v4i32: ; AVX: # %bb.0: @@ -1117,15 +1211,40 @@ define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) { } define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) { -; SSE-LABEL: bool_reduction_v4i64: -; SSE: # %bb.0: -; SSE-NEXT: pcmpgtq %xmm1, %xmm3 -; SSE-NEXT: pcmpgtq %xmm0, %xmm2 -; SSE-NEXT: packssdw %xmm3, %xmm2 -; SSE-NEXT: movmskps %xmm2, %eax -; SSE-NEXT: testl %eax, %eax -; SSE-NEXT: setne %al -; SSE-NEXT: retq +; SSE2-LABEL: bool_reduction_v4i64: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] +; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: movdqa %xmm3, %xmm5 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,0,2,2] +; SSE2-NEXT: pand %xmm5, %xmm1 +; SSE2-NEXT: por %xmm3, %xmm1 +; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: pxor %xmm4, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] +; SSE2-NEXT: pand %xmm3, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: movmskps %xmm0, %eax +; SSE2-NEXT: testl %eax, %eax +; SSE2-NEXT: setne %al +; SSE2-NEXT: retq +; +; SSE42-LABEL: bool_reduction_v4i64: +; SSE42: # %bb.0: +; SSE42-NEXT: pcmpgtq %xmm1, %xmm3 +; SSE42-NEXT: pcmpgtq %xmm0, %xmm2 +; SSE42-NEXT: packssdw %xmm3, %xmm2 +; SSE42-NEXT: movmskps %xmm2, %eax +; SSE42-NEXT: testl %eax, %eax +; SSE42-NEXT: setne %al +; SSE42-NEXT: retq ; ; AVX1-LABEL: bool_reduction_v4i64: ; AVX1: # %bb.0: @@ -1167,17 +1286,35 @@ define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) { } define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) { -; SSE-LABEL: bool_reduction_v8i32: -; SSE: # %bb.0: -; SSE-NEXT: pminud %xmm1, %xmm3 -; SSE-NEXT: pcmpeqd %xmm1, %xmm3 -; SSE-NEXT: pminud %xmm0, %xmm2 -; SSE-NEXT: pcmpeqd %xmm0, %xmm2 -; SSE-NEXT: packssdw %xmm3, %xmm2 -; SSE-NEXT: pmovmskb %xmm2, %eax -; SSE-NEXT: testl %eax, %eax -; SSE-NEXT: setne %al -; SSE-NEXT: retq +; SSE2-LABEL: bool_reduction_v8i32: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm4, %xmm3 +; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE2-NEXT: pxor %xmm3, %xmm1 +; SSE2-NEXT: pxor %xmm4, %xmm2 +; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm3, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: pmovmskb %xmm0, %eax +; SSE2-NEXT: testl %eax, %eax +; SSE2-NEXT: setne %al +; SSE2-NEXT: retq +; +; SSE42-LABEL: bool_reduction_v8i32: +; SSE42: # %bb.0: +; SSE42-NEXT: pminud %xmm1, %xmm3 +; SSE42-NEXT: pcmpeqd %xmm1, %xmm3 +; SSE42-NEXT: pminud %xmm0, %xmm2 +; SSE42-NEXT: pcmpeqd %xmm0, %xmm2 +; SSE42-NEXT: packssdw %xmm3, %xmm2 +; SSE42-NEXT: pmovmskb %xmm2, %eax +; SSE42-NEXT: testl %eax, %eax +; SSE42-NEXT: setne %al +; SSE42-NEXT: retq ; ; AVX1-LABEL: bool_reduction_v8i32: ; AVX1: # %bb.0: