From: Simon Pilgrim Date: Sat, 4 Feb 2023 13:59:57 +0000 (+0000) Subject: [X86] Swap bool reduction predicates in v2i64/v4i32 tests X-Git-Tag: upstream/17.0.6~18592 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c72aeaad28e1206dac007be9bc27e2c266906645;p=platform%2Fupstream%2Fllvm.git [X86] Swap bool reduction predicates in v2i64/v4i32 tests The v2i64 case has more scope for optimization with ne than ugt cases, and we already have very similar icmp sgt coverage via the v4i64 tests --- diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll index 581f1d7..e8a3dff 100644 --- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll @@ -1155,50 +1155,41 @@ define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) { define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE2-LABEL: bool_reduction_v2i64: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] -; SSE2-NEXT: pxor %xmm2, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 -; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2] -; SSE2-NEXT: pand %xmm2, %xmm1 -; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] +; SSE2-NEXT: pand %xmm0, %xmm1 ; SSE2-NEXT: movmskpd %xmm1, %eax +; SSE2-NEXT: xorl $3, %eax ; SSE2-NEXT: cmpb $3, %al ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; ; SSE42-LABEL: bool_reduction_v2i64: ; SSE42: # %bb.0: -; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; SSE42-NEXT: pxor %xmm2, %xmm1 -; SSE42-NEXT: pxor %xmm2, %xmm0 -; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 +; SSE42-NEXT: pcmpeqq %xmm1, %xmm0 ; SSE42-NEXT: movmskpd %xmm0, %eax +; SSE42-NEXT: xorl $3, %eax ; SSE42-NEXT: cmpb $3, %al ; SSE42-NEXT: sete %al ; SSE42-NEXT: retq ; ; AVX-LABEL: bool_reduction_v2i64: ; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovmskpd %xmm0, %eax +; AVX-NEXT: xorl $3, %eax ; AVX-NEXT: cmpb $3, %al ; AVX-NEXT: sete %al ; AVX-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v2i64: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0 +; AVX512-NEXT: vpcmpneqq %xmm1, %xmm0, %k0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: cmpb $3, %al ; AVX512-NEXT: sete %al ; AVX512-NEXT: retq - %a = icmp ugt <2 x i64> %x, %y + %a = icmp ne <2 x i64> %x, %y %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> %c = and <2 x i1> %a, %b %d = extractelement <2 x i1> %c, i32 0 @@ -1206,17 +1197,30 @@ define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) { } define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { -; SSE-LABEL: bool_reduction_v4i32: -; SSE: # %bb.0: -; SSE-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE-NEXT: movmskps %xmm0, %eax -; SSE-NEXT: xorl $15, %eax -; SSE-NEXT: cmpb $15, %al -; SSE-NEXT: sete %al -; SSE-NEXT: retq +; SSE2-LABEL: bool_reduction_v4i32: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: movmskps %xmm0, %eax +; SSE2-NEXT: cmpb $15, %al +; SSE2-NEXT: sete %al +; SSE2-NEXT: retq +; +; SSE42-LABEL: bool_reduction_v4i32: +; SSE42: # %bb.0: +; SSE42-NEXT: pminud %xmm0, %xmm1 +; SSE42-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE42-NEXT: movmskps %xmm1, %eax +; SSE42-NEXT: xorl $15, %eax +; SSE42-NEXT: cmpb $15, %al +; SSE42-NEXT: sete %al +; SSE42-NEXT: retq ; ; AVX-LABEL: bool_reduction_v4i32: ; AVX: # %bb.0: +; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm1 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovmskps %xmm0, %eax ; AVX-NEXT: xorl $15, %eax @@ -1226,12 +1230,12 @@ define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { ; ; AVX512-LABEL: bool_reduction_v4i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 +; AVX512-NEXT: vpcmpnleud %xmm1, %xmm0, %k0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: cmpb $15, %al ; AVX512-NEXT: sete %al ; AVX512-NEXT: retq - %a = icmp ne <4 x i32> %x, %y + %a = icmp ugt <4 x i32> %x, %y %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> %b = and <4 x i1> %s1, %a %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32> diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll index 9e68f4c..730bf30 100644 --- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll @@ -1050,50 +1050,36 @@ define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) { define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE2-LABEL: bool_reduction_v2i64: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] -; SSE2-NEXT: pxor %xmm2, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 -; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2] -; SSE2-NEXT: pand %xmm2, %xmm1 -; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] +; SSE2-NEXT: pand %xmm0, %xmm1 ; SSE2-NEXT: movmskpd %xmm1, %eax -; SSE2-NEXT: testl %eax, %eax +; SSE2-NEXT: cmpl $3, %eax ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; ; SSE42-LABEL: bool_reduction_v2i64: ; SSE42: # %bb.0: -; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; SSE42-NEXT: pxor %xmm2, %xmm1 -; SSE42-NEXT: pxor %xmm2, %xmm0 -; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE42-NEXT: movmskpd %xmm0, %eax -; SSE42-NEXT: testl %eax, %eax +; SSE42-NEXT: psubq %xmm1, %xmm0 +; SSE42-NEXT: ptest %xmm0, %xmm0 ; SSE42-NEXT: setne %al ; SSE42-NEXT: retq ; ; AVX-LABEL: bool_reduction_v2i64: ; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: testl %eax, %eax +; AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vptest %xmm0, %xmm0 ; AVX-NEXT: setne %al ; AVX-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v2i64: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0 +; AVX512-NEXT: vpcmpneqq %xmm1, %xmm0, %k0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: testb %al, %al ; AVX512-NEXT: setne %al ; AVX512-NEXT: retq - %a = icmp ugt <2 x i64> %x, %y + %a = icmp ne <2 x i64> %x, %y %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> %c = or <2 x i1> %a, %b %d = extractelement <2 x i1> %c, i32 0 @@ -1103,14 +1089,18 @@ define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) { define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { ; SSE2-LABEL: bool_reduction_v4i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: movmskps %xmm0, %eax -; SSE2-NEXT: cmpl $15, %eax +; SSE2-NEXT: testl %eax, %eax ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; ; SSE42-LABEL: bool_reduction_v4i32: ; SSE42: # %bb.0: +; SSE42-NEXT: pminud %xmm0, %xmm1 ; SSE42-NEXT: psubd %xmm1, %xmm0 ; SSE42-NEXT: ptest %xmm0, %xmm0 ; SSE42-NEXT: setne %al @@ -1118,6 +1108,7 @@ define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { ; ; AVX-LABEL: bool_reduction_v4i32: ; AVX: # %bb.0: +; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm1 ; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vptest %xmm0, %xmm0 ; AVX-NEXT: setne %al @@ -1125,12 +1116,12 @@ define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { ; ; AVX512-LABEL: bool_reduction_v4i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 +; AVX512-NEXT: vpcmpnleud %xmm1, %xmm0, %k0 ; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: testb %al, %al ; AVX512-NEXT: setne %al ; AVX512-NEXT: retq - %a = icmp ne <4 x i32> %x, %y + %a = icmp ugt <4 x i32> %x, %y %s1 = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> %b = or <4 x i1> %s1, %a %s2 = shufflevector <4 x i1> %b, <4 x i1> undef, <4 x i32>