; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512
define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_v2f64_sext:
}
define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
-; SSE-LABEL: test_v2i64_sext:
-; SSE: # %bb.0:
-; SSE-NEXT: pcmpgtq %xmm1, %xmm0
-; SSE-NEXT: movmskpd %xmm0, %ecx
-; SSE-NEXT: xorl %eax, %eax
-; SSE-NEXT: cmpl $3, %ecx
-; SSE-NEXT: sete %al
-; SSE-NEXT: negq %rax
-; SSE-NEXT: retq
+; SSE2-LABEL: test_v2i64_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: movmskpd %xmm1, %ecx
+; SSE2-NEXT: xorl %eax, %eax
+; SSE2-NEXT: cmpl $3, %ecx
+; SSE2-NEXT: sete %al
+; SSE2-NEXT: negq %rax
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: test_v2i64_sext:
+; SSE42: # %bb.0:
+; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT: movmskpd %xmm0, %ecx
+; SSE42-NEXT: xorl %eax, %eax
+; SSE42-NEXT: cmpl $3, %ecx
+; SSE42-NEXT: sete %al
+; SSE42-NEXT: negq %rax
+; SSE42-NEXT: retq
;
; AVX-LABEL: test_v2i64_sext:
; AVX: # %bb.0:
}
define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
-; SSE-LABEL: test_v4i64_sext:
-; SSE: # %bb.0:
-; SSE-NEXT: pcmpgtq %xmm3, %xmm1
-; SSE-NEXT: pcmpgtq %xmm2, %xmm0
-; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: movmskpd %xmm0, %ecx
-; SSE-NEXT: xorl %eax, %eax
-; SSE-NEXT: cmpl $3, %ecx
-; SSE-NEXT: sete %al
-; SSE-NEXT: negq %rax
-; SSE-NEXT: retq
+; SSE2-LABEL: test_v4i64_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm4, %xmm3
+; SSE2-NEXT: pxor %xmm4, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
+; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: por %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm4, %xmm2
+; SSE2-NEXT: pxor %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
+; SSE2-NEXT: pand %xmm1, %xmm2
+; SSE2-NEXT: por %xmm0, %xmm2
+; SSE2-NEXT: pand %xmm3, %xmm2
+; SSE2-NEXT: movmskpd %xmm2, %ecx
+; SSE2-NEXT: xorl %eax, %eax
+; SSE2-NEXT: cmpl $3, %ecx
+; SSE2-NEXT: sete %al
+; SSE2-NEXT: negq %rax
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: test_v4i64_sext:
+; SSE42: # %bb.0:
+; SSE42-NEXT: pcmpgtq %xmm3, %xmm1
+; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
+; SSE42-NEXT: pand %xmm1, %xmm0
+; SSE42-NEXT: movmskpd %xmm0, %ecx
+; SSE42-NEXT: xorl %eax, %eax
+; SSE42-NEXT: cmpl $3, %ecx
+; SSE42-NEXT: sete %al
+; SSE42-NEXT: negq %rax
+; SSE42-NEXT: retq
;
; AVX1-LABEL: test_v4i64_sext:
; AVX1: # %bb.0:
}
define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
-; SSE-LABEL: test_v4i64_legal_sext:
-; SSE: # %bb.0:
-; SSE-NEXT: pcmpgtq %xmm3, %xmm1
-; SSE-NEXT: pcmpgtq %xmm2, %xmm0
-; SSE-NEXT: packssdw %xmm1, %xmm0
-; SSE-NEXT: movmskps %xmm0, %ecx
-; SSE-NEXT: xorl %eax, %eax
-; SSE-NEXT: cmpl $15, %ecx
-; SSE-NEXT: sete %al
-; SSE-NEXT: negq %rax
-; SSE-NEXT: retq
+; SSE2-LABEL: test_v4i64_legal_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm4, %xmm3
+; SSE2-NEXT: pxor %xmm4, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
+; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: por %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm4, %xmm2
+; SSE2-NEXT: pxor %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
+; SSE2-NEXT: pand %xmm1, %xmm2
+; SSE2-NEXT: por %xmm0, %xmm2
+; SSE2-NEXT: packssdw %xmm3, %xmm2
+; SSE2-NEXT: movmskps %xmm2, %ecx
+; SSE2-NEXT: xorl %eax, %eax
+; SSE2-NEXT: cmpl $15, %ecx
+; SSE2-NEXT: sete %al
+; SSE2-NEXT: negq %rax
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: test_v4i64_legal_sext:
+; SSE42: # %bb.0:
+; SSE42-NEXT: pcmpgtq %xmm3, %xmm1
+; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
+; SSE42-NEXT: packssdw %xmm1, %xmm0
+; SSE42-NEXT: movmskps %xmm0, %ecx
+; SSE42-NEXT: xorl %eax, %eax
+; SSE42-NEXT: cmpl $15, %ecx
+; SSE42-NEXT: sete %al
+; SSE42-NEXT: negq %rax
+; SSE42-NEXT: retq
;
; AVX1-LABEL: test_v4i64_legal_sext:
; AVX1: # %bb.0:
}
define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
-; SSE-LABEL: bool_reduction_v2i64:
-; SSE: # %bb.0:
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; SSE-NEXT: pxor %xmm2, %xmm1
-; SSE-NEXT: pxor %xmm2, %xmm0
-; SSE-NEXT: pcmpgtq %xmm1, %xmm0
-; SSE-NEXT: movmskpd %xmm0, %eax
-; SSE-NEXT: cmpb $3, %al
-; SSE-NEXT: sete %al
-; SSE-NEXT: retq
+; SSE2-LABEL: bool_reduction_v2i64:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: movmskpd %xmm1, %eax
+; SSE2-NEXT: cmpb $3, %al
+; SSE2-NEXT: sete %al
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: bool_reduction_v2i64:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT: pxor %xmm2, %xmm1
+; SSE42-NEXT: pxor %xmm2, %xmm0
+; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT: movmskpd %xmm0, %eax
+; SSE42-NEXT: cmpb $3, %al
+; SSE42-NEXT: sete %al
+; SSE42-NEXT: retq
;
; AVX-LABEL: bool_reduction_v2i64:
; AVX: # %bb.0:
}
define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
-; SSE-LABEL: bool_reduction_v4i64:
-; SSE: # %bb.0:
-; SSE-NEXT: pcmpgtq %xmm1, %xmm3
-; SSE-NEXT: pcmpgtq %xmm0, %xmm2
-; SSE-NEXT: packssdw %xmm3, %xmm2
-; SSE-NEXT: movmskps %xmm2, %eax
-; SSE-NEXT: cmpb $15, %al
-; SSE-NEXT: sete %al
-; SSE-NEXT: retq
+; SSE2-LABEL: bool_reduction_v4i64:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm4, %xmm1
+; SSE2-NEXT: pxor %xmm4, %xmm3
+; SSE2-NEXT: movdqa %xmm3, %xmm5
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,0,2,2]
+; SSE2-NEXT: pand %xmm5, %xmm1
+; SSE2-NEXT: por %xmm3, %xmm1
+; SSE2-NEXT: pxor %xmm4, %xmm0
+; SSE2-NEXT: pxor %xmm4, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: packssdw %xmm1, %xmm0
+; SSE2-NEXT: movmskps %xmm0, %eax
+; SSE2-NEXT: cmpb $15, %al
+; SSE2-NEXT: sete %al
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: bool_reduction_v4i64:
+; SSE42: # %bb.0:
+; SSE42-NEXT: pcmpgtq %xmm1, %xmm3
+; SSE42-NEXT: pcmpgtq %xmm0, %xmm2
+; SSE42-NEXT: packssdw %xmm3, %xmm2
+; SSE42-NEXT: movmskps %xmm2, %eax
+; SSE42-NEXT: cmpb $15, %al
+; SSE42-NEXT: sete %al
+; SSE42-NEXT: retq
;
; AVX1-LABEL: bool_reduction_v4i64:
; AVX1: # %bb.0:
}
define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
-; SSE-LABEL: bool_reduction_v8i32:
-; SSE: # %bb.0:
-; SSE-NEXT: pminud %xmm1, %xmm3
-; SSE-NEXT: pcmpeqd %xmm1, %xmm3
-; SSE-NEXT: pminud %xmm0, %xmm2
-; SSE-NEXT: pcmpeqd %xmm0, %xmm2
-; SSE-NEXT: packssdw %xmm3, %xmm2
-; SSE-NEXT: packsswb %xmm2, %xmm2
-; SSE-NEXT: pmovmskb %xmm2, %eax
-; SSE-NEXT: cmpb $-1, %al
-; SSE-NEXT: sete %al
-; SSE-NEXT: retq
+; SSE2-LABEL: bool_reduction_v8i32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm4, %xmm3
+; SSE2-NEXT: pxor %xmm4, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm3, %xmm1
+; SSE2-NEXT: pxor %xmm4, %xmm2
+; SSE2-NEXT: pxor %xmm4, %xmm0
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm3, %xmm0
+; SSE2-NEXT: packssdw %xmm1, %xmm0
+; SSE2-NEXT: packsswb %xmm0, %xmm0
+; SSE2-NEXT: pmovmskb %xmm0, %eax
+; SSE2-NEXT: cmpb $-1, %al
+; SSE2-NEXT: sete %al
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: bool_reduction_v8i32:
+; SSE42: # %bb.0:
+; SSE42-NEXT: pminud %xmm1, %xmm3
+; SSE42-NEXT: pcmpeqd %xmm1, %xmm3
+; SSE42-NEXT: pminud %xmm0, %xmm2
+; SSE42-NEXT: pcmpeqd %xmm0, %xmm2
+; SSE42-NEXT: packssdw %xmm3, %xmm2
+; SSE42-NEXT: packsswb %xmm2, %xmm2
+; SSE42-NEXT: pmovmskb %xmm2, %eax
+; SSE42-NEXT: cmpb $-1, %al
+; SSE42-NEXT: sete %al
+; SSE42-NEXT: retq
;
; AVX1-LABEL: bool_reduction_v8i32:
; AVX1: # %bb.0:
}
define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
-; SSE-LABEL: bool_reduction_v16i16:
-; SSE: # %bb.0:
-; SSE-NEXT: psubb %xmm3, %xmm1
-; SSE-NEXT: psubb %xmm2, %xmm0
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: ptest %xmm0, %xmm0
-; SSE-NEXT: sete %al
-; SSE-NEXT: retq
+; SSE2-LABEL: bool_reduction_v16i16:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pcmpeqb %xmm3, %xmm1
+; SSE2-NEXT: pcmpeqb %xmm2, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: pmovmskb %xmm0, %eax
+; SSE2-NEXT: cmpw $-1, %ax
+; SSE2-NEXT: sete %al
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: bool_reduction_v16i16:
+; SSE42: # %bb.0:
+; SSE42-NEXT: psubb %xmm3, %xmm1
+; SSE42-NEXT: psubb %xmm2, %xmm0
+; SSE42-NEXT: por %xmm1, %xmm0
+; SSE42-NEXT: ptest %xmm0, %xmm0
+; SSE42-NEXT: sete %al
+; SSE42-NEXT: retq
;
; AVX1-LABEL: bool_reduction_v16i16:
; AVX1: # %bb.0:
}
define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
-; SSE-LABEL: bool_reduction_v32i8:
-; SSE: # %bb.0:
-; SSE-NEXT: psubb %xmm3, %xmm1
-; SSE-NEXT: psubb %xmm2, %xmm0
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: ptest %xmm0, %xmm0
-; SSE-NEXT: sete %al
-; SSE-NEXT: retq
+; SSE2-LABEL: bool_reduction_v32i8:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pcmpeqb %xmm3, %xmm1
+; SSE2-NEXT: pcmpeqb %xmm2, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: pmovmskb %xmm0, %eax
+; SSE2-NEXT: cmpw $-1, %ax
+; SSE2-NEXT: sete %al
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: bool_reduction_v32i8:
+; SSE42: # %bb.0:
+; SSE42-NEXT: psubb %xmm3, %xmm1
+; SSE42-NEXT: psubb %xmm2, %xmm0
+; SSE42-NEXT: por %xmm1, %xmm0
+; SSE42-NEXT: ptest %xmm0, %xmm0
+; SSE42-NEXT: sete %al
+; SSE42-NEXT: retq
;
; AVX1-LABEL: bool_reduction_v32i8:
; AVX1: # %bb.0:
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512
define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_v2f64_sext:
}
define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
-; SSE-LABEL: test_v2i64_sext:
-; SSE: # %bb.0:
-; SSE-NEXT: pcmpgtq %xmm1, %xmm0
-; SSE-NEXT: movmskpd %xmm0, %ecx
-; SSE-NEXT: xorl %eax, %eax
-; SSE-NEXT: negl %ecx
-; SSE-NEXT: sbbq %rax, %rax
-; SSE-NEXT: retq
+; SSE2-LABEL: test_v2i64_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: movmskpd %xmm1, %ecx
+; SSE2-NEXT: xorl %eax, %eax
+; SSE2-NEXT: negl %ecx
+; SSE2-NEXT: sbbq %rax, %rax
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: test_v2i64_sext:
+; SSE42: # %bb.0:
+; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT: movmskpd %xmm0, %ecx
+; SSE42-NEXT: xorl %eax, %eax
+; SSE42-NEXT: negl %ecx
+; SSE42-NEXT: sbbq %rax, %rax
+; SSE42-NEXT: retq
;
; AVX-LABEL: test_v2i64_sext:
; AVX: # %bb.0:
}
define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
-; SSE-LABEL: test_v4i64_sext:
-; SSE: # %bb.0:
-; SSE-NEXT: pcmpgtq %xmm3, %xmm1
-; SSE-NEXT: pcmpgtq %xmm2, %xmm0
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: movmskpd %xmm0, %ecx
-; SSE-NEXT: xorl %eax, %eax
-; SSE-NEXT: negl %ecx
-; SSE-NEXT: sbbq %rax, %rax
-; SSE-NEXT: retq
+; SSE2-LABEL: test_v4i64_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm4, %xmm3
+; SSE2-NEXT: pxor %xmm4, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
+; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: por %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm4, %xmm2
+; SSE2-NEXT: pxor %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
+; SSE2-NEXT: pand %xmm1, %xmm2
+; SSE2-NEXT: por %xmm0, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movmskpd %xmm2, %ecx
+; SSE2-NEXT: xorl %eax, %eax
+; SSE2-NEXT: negl %ecx
+; SSE2-NEXT: sbbq %rax, %rax
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: test_v4i64_sext:
+; SSE42: # %bb.0:
+; SSE42-NEXT: pcmpgtq %xmm3, %xmm1
+; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
+; SSE42-NEXT: por %xmm1, %xmm0
+; SSE42-NEXT: movmskpd %xmm0, %ecx
+; SSE42-NEXT: xorl %eax, %eax
+; SSE42-NEXT: negl %ecx
+; SSE42-NEXT: sbbq %rax, %rax
+; SSE42-NEXT: retq
;
; AVX1-LABEL: test_v4i64_sext:
; AVX1: # %bb.0:
}
define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
-; SSE-LABEL: test_v4i64_legal_sext:
-; SSE: # %bb.0:
-; SSE-NEXT: pcmpgtq %xmm3, %xmm1
-; SSE-NEXT: pcmpgtq %xmm2, %xmm0
-; SSE-NEXT: packssdw %xmm1, %xmm0
-; SSE-NEXT: movmskps %xmm0, %ecx
-; SSE-NEXT: xorl %eax, %eax
-; SSE-NEXT: negl %ecx
-; SSE-NEXT: sbbq %rax, %rax
-; SSE-NEXT: retq
+; SSE2-LABEL: test_v4i64_legal_sext:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm4, %xmm3
+; SSE2-NEXT: pxor %xmm4, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
+; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: por %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm4, %xmm2
+; SSE2-NEXT: pxor %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
+; SSE2-NEXT: pand %xmm1, %xmm2
+; SSE2-NEXT: por %xmm0, %xmm2
+; SSE2-NEXT: packssdw %xmm3, %xmm2
+; SSE2-NEXT: movmskps %xmm2, %ecx
+; SSE2-NEXT: xorl %eax, %eax
+; SSE2-NEXT: negl %ecx
+; SSE2-NEXT: sbbq %rax, %rax
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: test_v4i64_legal_sext:
+; SSE42: # %bb.0:
+; SSE42-NEXT: pcmpgtq %xmm3, %xmm1
+; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
+; SSE42-NEXT: packssdw %xmm1, %xmm0
+; SSE42-NEXT: movmskps %xmm0, %ecx
+; SSE42-NEXT: xorl %eax, %eax
+; SSE42-NEXT: negl %ecx
+; SSE42-NEXT: sbbq %rax, %rax
+; SSE42-NEXT: retq
;
; AVX1-LABEL: test_v4i64_legal_sext:
; AVX1: # %bb.0:
}
define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
-; SSE-LABEL: bool_reduction_v2i64:
-; SSE: # %bb.0:
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; SSE-NEXT: pxor %xmm2, %xmm1
-; SSE-NEXT: pxor %xmm2, %xmm0
-; SSE-NEXT: pcmpgtq %xmm1, %xmm0
-; SSE-NEXT: movmskpd %xmm0, %eax
-; SSE-NEXT: testl %eax, %eax
-; SSE-NEXT: setne %al
-; SSE-NEXT: retq
+; SSE2-LABEL: bool_reduction_v2i64:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: movmskpd %xmm1, %eax
+; SSE2-NEXT: testl %eax, %eax
+; SSE2-NEXT: setne %al
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: bool_reduction_v2i64:
+; SSE42: # %bb.0:
+; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT: pxor %xmm2, %xmm1
+; SSE42-NEXT: pxor %xmm2, %xmm0
+; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT: movmskpd %xmm0, %eax
+; SSE42-NEXT: testl %eax, %eax
+; SSE42-NEXT: setne %al
+; SSE42-NEXT: retq
;
; AVX-LABEL: bool_reduction_v2i64:
; AVX: # %bb.0:
}
define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
-; SSE-LABEL: bool_reduction_v4i32:
-; SSE: # %bb.0:
-; SSE-NEXT: psubd %xmm1, %xmm0
-; SSE-NEXT: ptest %xmm0, %xmm0
-; SSE-NEXT: setne %al
-; SSE-NEXT: retq
+; SSE2-LABEL: bool_reduction_v4i32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT: movmskps %xmm0, %eax
+; SSE2-NEXT: cmpl $15, %eax
+; SSE2-NEXT: setne %al
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: bool_reduction_v4i32:
+; SSE42: # %bb.0:
+; SSE42-NEXT: psubd %xmm1, %xmm0
+; SSE42-NEXT: ptest %xmm0, %xmm0
+; SSE42-NEXT: setne %al
+; SSE42-NEXT: retq
;
; AVX-LABEL: bool_reduction_v4i32:
; AVX: # %bb.0:
}
define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
-; SSE-LABEL: bool_reduction_v4i64:
-; SSE: # %bb.0:
-; SSE-NEXT: pcmpgtq %xmm1, %xmm3
-; SSE-NEXT: pcmpgtq %xmm0, %xmm2
-; SSE-NEXT: packssdw %xmm3, %xmm2
-; SSE-NEXT: movmskps %xmm2, %eax
-; SSE-NEXT: testl %eax, %eax
-; SSE-NEXT: setne %al
-; SSE-NEXT: retq
+; SSE2-LABEL: bool_reduction_v4i64:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm4, %xmm1
+; SSE2-NEXT: pxor %xmm4, %xmm3
+; SSE2-NEXT: movdqa %xmm3, %xmm5
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,0,2,2]
+; SSE2-NEXT: pand %xmm5, %xmm1
+; SSE2-NEXT: por %xmm3, %xmm1
+; SSE2-NEXT: pxor %xmm4, %xmm0
+; SSE2-NEXT: pxor %xmm4, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2]
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: packssdw %xmm1, %xmm0
+; SSE2-NEXT: movmskps %xmm0, %eax
+; SSE2-NEXT: testl %eax, %eax
+; SSE2-NEXT: setne %al
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: bool_reduction_v4i64:
+; SSE42: # %bb.0:
+; SSE42-NEXT: pcmpgtq %xmm1, %xmm3
+; SSE42-NEXT: pcmpgtq %xmm0, %xmm2
+; SSE42-NEXT: packssdw %xmm3, %xmm2
+; SSE42-NEXT: movmskps %xmm2, %eax
+; SSE42-NEXT: testl %eax, %eax
+; SSE42-NEXT: setne %al
+; SSE42-NEXT: retq
;
; AVX1-LABEL: bool_reduction_v4i64:
; AVX1: # %bb.0:
}
define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
-; SSE-LABEL: bool_reduction_v8i32:
-; SSE: # %bb.0:
-; SSE-NEXT: pminud %xmm1, %xmm3
-; SSE-NEXT: pcmpeqd %xmm1, %xmm3
-; SSE-NEXT: pminud %xmm0, %xmm2
-; SSE-NEXT: pcmpeqd %xmm0, %xmm2
-; SSE-NEXT: packssdw %xmm3, %xmm2
-; SSE-NEXT: pmovmskb %xmm2, %eax
-; SSE-NEXT: testl %eax, %eax
-; SSE-NEXT: setne %al
-; SSE-NEXT: retq
+; SSE2-LABEL: bool_reduction_v8i32:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm4, %xmm3
+; SSE2-NEXT: pxor %xmm4, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm3, %xmm1
+; SSE2-NEXT: pxor %xmm4, %xmm2
+; SSE2-NEXT: pxor %xmm4, %xmm0
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm3, %xmm0
+; SSE2-NEXT: packssdw %xmm1, %xmm0
+; SSE2-NEXT: pmovmskb %xmm0, %eax
+; SSE2-NEXT: testl %eax, %eax
+; SSE2-NEXT: setne %al
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: bool_reduction_v8i32:
+; SSE42: # %bb.0:
+; SSE42-NEXT: pminud %xmm1, %xmm3
+; SSE42-NEXT: pcmpeqd %xmm1, %xmm3
+; SSE42-NEXT: pminud %xmm0, %xmm2
+; SSE42-NEXT: pcmpeqd %xmm0, %xmm2
+; SSE42-NEXT: packssdw %xmm3, %xmm2
+; SSE42-NEXT: pmovmskb %xmm2, %eax
+; SSE42-NEXT: testl %eax, %eax
+; SSE42-NEXT: setne %al
+; SSE42-NEXT: retq
;
; AVX1-LABEL: bool_reduction_v8i32:
; AVX1: # %bb.0: