From bc81f5a9733a58e932dfbbf16ee352ed294f6baa Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 22 Apr 2023 17:00:35 +0100 Subject: [PATCH] [X86] combineSetCCMOVMSK - fold allof movmskps/movmskpd -> testps/testpd Fixes #60007 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 14 +- llvm/test/CodeGen/X86/combine-movmsk-avx.ll | 28 ++- llvm/test/CodeGen/X86/combine-movmsk.ll | 25 ++- llvm/test/CodeGen/X86/movmsk-cmp.ll | 188 ++++++++++-------- llvm/test/CodeGen/X86/vector-compare-all_of.ll | 263 ++++++++++++++----------- 5 files changed, 306 insertions(+), 212 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7a86d5b..e07b597 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47665,13 +47665,21 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, // MOVMSKPS(V) !=/== 0 -> TESTPS(V,V) // MOVMSKPD(V) !=/== 0 -> TESTPD(V,V) + // MOVMSKPS(V) !=/== -1 -> TESTPS(V,V) + // MOVMSKPD(V) !=/== -1 -> TESTPD(V,V) // iff every element is referenced. - if (NumElts <= CmpBits && IsAnyOf && Subtarget.hasAVX() && IsOneUse && + if (NumElts <= CmpBits && Subtarget.hasAVX() && IsOneUse && (NumEltBits == 32 || NumEltBits == 64)) { + SDLoc DL(EFLAGS); MVT FloatSVT = MVT::getFloatingPointVT(NumEltBits); MVT FloatVT = MVT::getVectorVT(FloatSVT, NumElts); - SDValue V = DAG.getBitcast(FloatVT, Vec); - return DAG.getNode(X86ISD::TESTP, SDLoc(EFLAGS), MVT::i32, V, V); + MVT IntVT = FloatVT.changeVectorElementTypeToInteger(); + SDValue LHS = Vec; + SDValue RHS = IsAnyOf ? Vec : DAG.getAllOnesConstant(DL, IntVT); + CC = IsAnyOf ? CC : (CC == X86::COND_E ? X86::COND_B : X86::COND_AE); + return DAG.getNode(X86ISD::TESTP, DL, MVT::i32, + DAG.getBitcast(FloatVT, LHS), + DAG.getBitcast(FloatVT, RHS)); } return SDValue(); diff --git a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll index 70774ec..8e09b4e 100644 --- a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll +++ b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll @@ -25,15 +25,25 @@ define i1 @movmskps_noneof_bitcast_v4f64(<4 x double> %a0) { } define i1 @movmskps_allof_bitcast_v4f64(<4 x double> %a0) { -; CHECK-LABEL: movmskps_allof_bitcast_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: vmovmskpd %ymm0, %eax -; CHECK-NEXT: cmpl $15, %eax -; CHECK-NEXT: sete %al -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; AVX1-LABEL: movmskps_allof_bitcast_v4f64: +; AVX1: # %bb.0: +; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 +; AVX1-NEXT: vtestpd %ymm1, %ymm0 +; AVX1-NEXT: setb %al +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: movmskps_allof_bitcast_v4f64: +; AVX2: # %bb.0: +; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestpd %ymm1, %ymm0 +; AVX2-NEXT: setb %al +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq %1 = fcmp oeq <4 x double> %a0, zeroinitializer %2 = sext <4 x i1> %1 to <4 x i64> %3 = bitcast <4 x i64> %2 to <8 x float> diff --git a/llvm/test/CodeGen/X86/combine-movmsk.ll b/llvm/test/CodeGen/X86/combine-movmsk.ll index 9ad3c16..d6105f5 100644 --- a/llvm/test/CodeGen/X86/combine-movmsk.ll +++ b/llvm/test/CodeGen/X86/combine-movmsk.ll @@ -49,9 +49,9 @@ define i1 @movmskps_allof_bitcast_v2f64(<2 x double> %a0) { ; AVX: # %bb.0: ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: cmpl $3, %eax -; AVX-NEXT: sete %al +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vtestpd %xmm1, %xmm0 +; AVX-NEXT: setb %al ; AVX-NEXT: retq %1 = fcmp oeq <2 x double> zeroinitializer, %a0 %2 = sext <2 x i1> %1 to <2 x i64> @@ -108,9 +108,9 @@ define i1 @pmovmskb_allof_bitcast_v2i64(<2 x i64> %a0) { ; ; AVX-LABEL: pmovmskb_allof_bitcast_v2i64: ; AVX: # %bb.0: -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: cmpl $3, %eax -; AVX-NEXT: sete %al +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vtestpd %xmm1, %xmm0 +; AVX-NEXT: setb %al ; AVX-NEXT: retq %1 = icmp sgt <2 x i64> zeroinitializer, %a0 %2 = sext <2 x i1> %1 to <2 x i64> @@ -159,9 +159,9 @@ define i1 @pmovmskb_allof_bitcast_v4f32(<4 x float> %a0) { ; AVX: # %bb.0: ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: cmpl $15, %eax -; AVX-NEXT: sete %al +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vtestps %xmm1, %xmm0 +; AVX-NEXT: setb %al ; AVX-NEXT: retq %1 = fcmp oeq <4 x float> %a0, zeroinitializer %2 = sext <4 x i1> %1 to <4 x i32> @@ -395,11 +395,10 @@ define i32 @movmskps_ptest_numelts_mismatch(<16 x i8> %a0) { ; AVX: # %bb.0: ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %ecx +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: cmpl $15, %ecx -; AVX-NEXT: sete %al -; AVX-NEXT: negl %eax +; AVX-NEXT: vtestps %xmm1, %xmm0 +; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: retq %1 = icmp eq <16 x i8> %a0, zeroinitializer %2 = sext <16 x i1> %1 to <16 x i8> diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll index 8473e99..708d3cd 100644 --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -555,9 +555,9 @@ define i1 @allones_v4i32_sign(<4 x i32> %arg) { ; ; AVX-LABEL: allones_v4i32_sign: ; AVX: # %bb.0: -; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: cmpb $15, %al -; AVX-NEXT: sete %al +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vtestps %xmm1, %xmm0 +; AVX-NEXT: setb %al ; AVX-NEXT: retq %tmp = icmp slt <4 x i32> %arg, zeroinitializer %tmp1 = bitcast <4 x i1> %tmp to i4 @@ -594,13 +594,30 @@ define i1 @allones_v8i32_sign(<8 x i32> %arg) { ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; -; AVX-LABEL: allones_v8i32_sign: -; AVX: # %bb.0: -; AVX-NEXT: vmovmskps %ymm0, %eax -; AVX-NEXT: cmpb $-1, %al -; AVX-NEXT: sete %al -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; AVX1-LABEL: allones_v8i32_sign: +; AVX1: # %bb.0: +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 +; AVX1-NEXT: vtestps %ymm1, %ymm0 +; AVX1-NEXT: setb %al +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: allones_v8i32_sign: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestps %ymm1, %ymm0 +; AVX2-NEXT: setb %al +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: allones_v8i32_sign: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vtestps %ymm1, %ymm0 +; AVX512-NEXT: setb %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %tmp = icmp slt <8 x i32> %arg, zeroinitializer %tmp1 = bitcast <8 x i1> %tmp to i8 %tmp2 = icmp eq i8 %tmp1, -1 @@ -753,13 +770,30 @@ define i1 @allones_v4i64_sign(<4 x i64> %arg) { ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; -; AVX-LABEL: allones_v4i64_sign: -; AVX: # %bb.0: -; AVX-NEXT: vmovmskpd %ymm0, %eax -; AVX-NEXT: cmpb $15, %al -; AVX-NEXT: sete %al -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; AVX1-LABEL: allones_v4i64_sign: +; AVX1: # %bb.0: +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 +; AVX1-NEXT: vtestpd %ymm1, %ymm0 +; AVX1-NEXT: setb %al +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: allones_v4i64_sign: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestpd %ymm1, %ymm0 +; AVX2-NEXT: setb %al +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: allones_v4i64_sign: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vtestpd %ymm1, %ymm0 +; AVX512-NEXT: setb %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %tmp = icmp slt <4 x i64> %arg, zeroinitializer %tmp1 = bitcast <4 x i1> %tmp to i4 %tmp2 = icmp eq i4 %tmp1, -1 @@ -806,18 +840,18 @@ define i1 @allones_v8i64_sign(<8 x i64> %arg) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovmskps %xmm0, %eax -; AVX1-NEXT: cmpl $15, %eax -; AVX1-NEXT: sete %al +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vtestps %xmm1, %xmm0 +; AVX1-NEXT: setb %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: allones_v8i64_sign: ; AVX2: # %bb.0: ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vmovmskps %ymm0, %eax -; AVX2-NEXT: cmpb $-1, %al -; AVX2-NEXT: sete %al +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestps %ymm1, %ymm0 +; AVX2-NEXT: setb %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -1592,9 +1626,9 @@ define i1 @allones_v4i32_and1(<4 x i32> %arg) { ; AVX1OR2-LABEL: allones_v4i32_and1: ; AVX1OR2: # %bb.0: ; AVX1OR2-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX1OR2-NEXT: vmovmskps %xmm0, %eax -; AVX1OR2-NEXT: cmpb $15, %al -; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vtestps %xmm1, %xmm0 +; AVX1OR2-NEXT: setb %al ; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allones_v4i32_and1: @@ -1679,18 +1713,18 @@ define i1 @allones_v8i32_and1(<8 x i32> %arg) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX1-NEXT: vmovmskps %xmm0, %eax -; AVX1-NEXT: cmpl $15, %eax -; AVX1-NEXT: sete %al +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vtestps %xmm1, %xmm0 +; AVX1-NEXT: setb %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: allones_v8i32_and1: ; AVX2: # %bb.0: ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 -; AVX2-NEXT: vmovmskps %ymm0, %eax -; AVX2-NEXT: cmpb $-1, %al -; AVX2-NEXT: sete %al +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestps %ymm1, %ymm0 +; AVX2-NEXT: setb %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -1887,9 +1921,9 @@ define i1 @allones_v2i64_and1(<2 x i64> %arg) { ; AVX1OR2-LABEL: allones_v2i64_and1: ; AVX1OR2: # %bb.0: ; AVX1OR2-NEXT: vpsllq $63, %xmm0, %xmm0 -; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax -; AVX1OR2-NEXT: cmpb $3, %al -; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0 +; AVX1OR2-NEXT: setb %al ; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allones_v2i64_and1: @@ -1973,18 +2007,18 @@ define i1 @allones_v4i64_and1(<4 x i64> %arg) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0 -; AVX1-NEXT: vmovmskpd %xmm0, %eax -; AVX1-NEXT: cmpl $3, %eax -; AVX1-NEXT: sete %al +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vtestpd %xmm1, %xmm0 +; AVX1-NEXT: setb %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: allones_v4i64_and1: ; AVX2: # %bb.0: ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0 -; AVX2-NEXT: vmovmskpd %ymm0, %eax -; AVX2-NEXT: cmpb $15, %al -; AVX2-NEXT: sete %al +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestpd %ymm1, %ymm0 +; AVX2-NEXT: setb %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -2086,9 +2120,9 @@ define i1 @allones_v8i64_and1(<8 x i64> %arg) { ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovmskps %xmm0, %eax -; AVX1-NEXT: cmpl $15, %eax -; AVX1-NEXT: sete %al +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vtestps %xmm1, %xmm0 +; AVX1-NEXT: setb %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -2097,9 +2131,9 @@ define i1 @allones_v8i64_and1(<8 x i64> %arg) { ; AVX2-NEXT: vpsllq $63, %ymm1, %ymm1 ; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vmovmskps %ymm0, %eax -; AVX2-NEXT: cmpb $-1, %al -; AVX2-NEXT: sete %al +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestps %ymm1, %ymm0 +; AVX2-NEXT: setb %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -2738,9 +2772,9 @@ define i1 @allones_v4i32_and4(<4 x i32> %arg) { ; AVX1OR2-LABEL: allones_v4i32_and4: ; AVX1OR2: # %bb.0: ; AVX1OR2-NEXT: vpslld $29, %xmm0, %xmm0 -; AVX1OR2-NEXT: vmovmskps %xmm0, %eax -; AVX1OR2-NEXT: cmpb $15, %al -; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vtestps %xmm1, %xmm0 +; AVX1OR2-NEXT: setb %al ; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allones_v4i32_and4: @@ -2825,18 +2859,18 @@ define i1 @allones_v8i32_and4(<8 x i32> %arg) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpslld $29, %xmm0, %xmm0 -; AVX1-NEXT: vmovmskps %xmm0, %eax -; AVX1-NEXT: cmpl $15, %eax -; AVX1-NEXT: sete %al +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vtestps %xmm1, %xmm0 +; AVX1-NEXT: setb %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: allones_v8i32_and4: ; AVX2: # %bb.0: ; AVX2-NEXT: vpslld $29, %ymm0, %ymm0 -; AVX2-NEXT: vmovmskps %ymm0, %eax -; AVX2-NEXT: cmpb $-1, %al -; AVX2-NEXT: sete %al +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestps %ymm1, %ymm0 +; AVX2-NEXT: setb %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -3033,9 +3067,9 @@ define i1 @allones_v2i64_and4(<2 x i64> %arg) { ; AVX1OR2-LABEL: allones_v2i64_and4: ; AVX1OR2: # %bb.0: ; AVX1OR2-NEXT: vpsllq $61, %xmm0, %xmm0 -; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax -; AVX1OR2-NEXT: cmpb $3, %al -; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0 +; AVX1OR2-NEXT: setb %al ; AVX1OR2-NEXT: retq ; ; KNL-LABEL: allones_v2i64_and4: @@ -3119,18 +3153,18 @@ define i1 @allones_v4i64_and4(<4 x i64> %arg) { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0 -; AVX1-NEXT: vmovmskpd %xmm0, %eax -; AVX1-NEXT: cmpl $3, %eax -; AVX1-NEXT: sete %al +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vtestpd %xmm1, %xmm0 +; AVX1-NEXT: setb %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: allones_v4i64_and4: ; AVX2: # %bb.0: ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0 -; AVX2-NEXT: vmovmskpd %ymm0, %eax -; AVX2-NEXT: cmpb $15, %al -; AVX2-NEXT: sete %al +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestpd %ymm1, %ymm0 +; AVX2-NEXT: setb %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -3232,9 +3266,9 @@ define i1 @allones_v8i64_and4(<8 x i64> %arg) { ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovmskps %xmm0, %eax -; AVX1-NEXT: cmpl $15, %eax -; AVX1-NEXT: sete %al +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vtestps %xmm1, %xmm0 +; AVX1-NEXT: setb %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -3243,9 +3277,9 @@ define i1 @allones_v8i64_and4(<8 x i64> %arg) { ; AVX2-NEXT: vpsllq $61, %ymm1, %ymm1 ; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0 ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vmovmskps %ymm0, %eax -; AVX2-NEXT: cmpb $-1, %al -; AVX2-NEXT: sete %al +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestps %ymm1, %ymm0 +; AVX2-NEXT: setb %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -3827,9 +3861,9 @@ define i1 @movmsk_and_v2f64(<2 x double> %x, <2 x double> %y) { ; AVX1OR2-LABEL: movmsk_and_v2f64: ; AVX1OR2: # %bb.0: ; AVX1OR2-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 -; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax -; AVX1OR2-NEXT: cmpb $3, %al -; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0 +; AVX1OR2-NEXT: setb %al ; AVX1OR2-NEXT: retq ; ; KNL-LABEL: movmsk_and_v2f64: @@ -4257,11 +4291,11 @@ define i32 @PR39665_c_ray_opt(<2 x double> %x, <2 x double> %y) { ; AVX1OR2-LABEL: PR39665_c_ray_opt: ; AVX1OR2: # %bb.0: ; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax -; AVX1OR2-NEXT: cmpb $3, %al +; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0 ; AVX1OR2-NEXT: movl $42, %ecx ; AVX1OR2-NEXT: movl $99, %eax -; AVX1OR2-NEXT: cmovel %ecx, %eax +; AVX1OR2-NEXT: cmovbl %ecx, %eax ; AVX1OR2-NEXT: retq ; ; KNL-LABEL: PR39665_c_ray_opt: diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll index 66ada15..8892134 100644 --- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll @@ -19,11 +19,10 @@ define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) { ; AVX-LABEL: test_v2f64_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %ecx ; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: cmpl $3, %ecx -; AVX-NEXT: sete %al -; AVX-NEXT: negq %rax +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vtestpd %xmm1, %xmm0 +; AVX-NEXT: sbbq %rax, %rax ; AVX-NEXT: retq %c = fcmp ogt <2 x double> %a0, %a1 %s = sext <2 x i1> %c to <2 x i64> @@ -46,16 +45,36 @@ define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) { ; SSE-NEXT: negq %rax ; SSE-NEXT: retq ; -; AVX-LABEL: test_v4f64_sext: -; AVX: # %bb.0: -; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 -; AVX-NEXT: vmovmskpd %ymm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: cmpl $15, %ecx -; AVX-NEXT: sete %al -; AVX-NEXT: negq %rax -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; AVX1-LABEL: test_v4f64_sext: +; AVX1: # %bb.0: +; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 +; AVX1-NEXT: vtestpd %ymm1, %ymm0 +; AVX1-NEXT: sbbq %rax, %rax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v4f64_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestpd %ymm1, %ymm0 +; AVX2-NEXT: sbbq %rax, %rax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_v4f64_sext: +; AVX512: # %bb.0: +; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vtestpd %ymm1, %ymm0 +; AVX512-NEXT: sbbq %rax, %rax +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %c = fcmp ogt <4 x double> %a0, %a1 %s = sext <4 x i1> %c to <4 x i64> %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> @@ -84,11 +103,10 @@ define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) { ; AVX1OR2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 ; AVX1OR2-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX1OR2-NEXT: vmovmskps %xmm0, %ecx +; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX1OR2-NEXT: xorl %eax, %eax -; AVX1OR2-NEXT: cmpl $15, %ecx -; AVX1OR2-NEXT: sete %al -; AVX1OR2-NEXT: negq %rax +; AVX1OR2-NEXT: vtestps %xmm1, %xmm0 +; AVX1OR2-NEXT: sbbq %rax, %rax ; AVX1OR2-NEXT: vzeroupper ; AVX1OR2-NEXT: retq ; @@ -96,12 +114,10 @@ define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) { ; AVX512: # %bb.0: ; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: vmovmskps %xmm0, %ecx +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z} ; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: cmpl $15, %ecx -; AVX512-NEXT: sete %al -; AVX512-NEXT: negq %rax +; AVX512-NEXT: vtestps %xmm0, %xmm1 +; AVX512-NEXT: sbbq %rax, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %c = fcmp ogt <4 x double> %a0, %a1 @@ -129,11 +145,10 @@ define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) { ; AVX-LABEL: test_v4f32_sext: ; AVX: # %bb.0: ; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %ecx ; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: cmpl $15, %ecx -; AVX-NEXT: sete %al -; AVX-NEXT: negl %eax +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vtestps %xmm1, %xmm0 +; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: retq %c = fcmp ogt <4 x float> %a0, %a1 %s = sext <4 x i1> %c to <4 x i32> @@ -158,16 +173,36 @@ define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) { ; SSE-NEXT: negl %eax ; SSE-NEXT: retq ; -; AVX-LABEL: test_v8f32_sext: -; AVX: # %bb.0: -; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 -; AVX-NEXT: vmovmskps %ymm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: cmpl $255, %ecx -; AVX-NEXT: sete %al -; AVX-NEXT: negl %eax -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; AVX1-LABEL: test_v8f32_sext: +; AVX1: # %bb.0: +; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 +; AVX1-NEXT: xorl %eax, %eax +; AVX1-NEXT: vtestps %ymm1, %ymm0 +; AVX1-NEXT: sbbl %eax, %eax +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_v8f32_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestps %ymm1, %ymm0 +; AVX2-NEXT: sbbl %eax, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: test_v8f32_sext: +; AVX512: # %bb.0: +; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vtestps %ymm1, %ymm0 +; AVX512-NEXT: sbbl %eax, %eax +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %c = fcmp ogt <8 x float> %a0, %a1 %s = sext <8 x i1> %c to <8 x i32> %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> @@ -262,11 +297,10 @@ define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) { ; AVX-LABEL: test_v2i64_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %ecx ; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: cmpl $3, %ecx -; AVX-NEXT: sete %al -; AVX-NEXT: negq %rax +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vtestpd %xmm1, %xmm0 +; AVX-NEXT: sbbq %rax, %rax ; AVX-NEXT: retq %c = icmp sgt <2 x i64> %a0, %a1 %s = sext <2 x i1> %c to <2 x i64> @@ -323,33 +357,30 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) { ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovmskpd %xmm0, %ecx ; AVX1-NEXT: xorl %eax, %eax -; AVX1-NEXT: cmpl $3, %ecx -; AVX1-NEXT: sete %al -; AVX1-NEXT: negq %rax +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vtestpd %xmm1, %xmm0 +; AVX1-NEXT: sbbq %rax, %rax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v4i64_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vmovmskpd %ymm0, %ecx ; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: cmpl $15, %ecx -; AVX2-NEXT: sete %al -; AVX2-NEXT: negq %rax +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestpd %ymm1, %ymm0 +; AVX2-NEXT: sbbq %rax, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_v4i64_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vmovmskpd %ymm0, %ecx ; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: cmpl $15, %ecx -; AVX512-NEXT: sete %al -; AVX512-NEXT: negq %rax +; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vtestpd %ymm1, %ymm0 +; AVX512-NEXT: sbbq %rax, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %c = icmp sgt <4 x i64> %a0, %a1 @@ -409,11 +440,10 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) { ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovmskps %xmm0, %ecx +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: xorl %eax, %eax -; AVX1-NEXT: cmpl $15, %ecx -; AVX1-NEXT: sete %al -; AVX1-NEXT: negq %rax +; AVX1-NEXT: vtestps %xmm1, %xmm0 +; AVX1-NEXT: sbbq %rax, %rax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -422,11 +452,10 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) { ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovmskps %xmm0, %ecx +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: cmpl $15, %ecx -; AVX2-NEXT: sete %al -; AVX2-NEXT: negq %rax +; AVX2-NEXT: vtestps %xmm1, %xmm0 +; AVX2-NEXT: sbbq %rax, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -434,12 +463,10 @@ define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) { ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512-NEXT: vmovmskps %xmm0, %ecx +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z} ; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: cmpl $15, %ecx -; AVX512-NEXT: sete %al -; AVX512-NEXT: negq %rax +; AVX512-NEXT: vtestps %xmm0, %xmm1 +; AVX512-NEXT: sbbq %rax, %rax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %c = icmp sgt <4 x i64> %a0, %a1 @@ -467,11 +494,10 @@ define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) { ; AVX-LABEL: test_v4i32_sext: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskps %xmm0, %ecx ; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: cmpl $15, %ecx -; AVX-NEXT: sete %al -; AVX-NEXT: negl %eax +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vtestps %xmm1, %xmm0 +; AVX-NEXT: sbbl %eax, %eax ; AVX-NEXT: retq %c = icmp sgt <4 x i32> %a0, %a1 %s = sext <4 x i1> %c to <4 x i32> @@ -503,33 +529,30 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) { ; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovmskps %xmm0, %ecx ; AVX1-NEXT: xorl %eax, %eax -; AVX1-NEXT: cmpl $15, %ecx -; AVX1-NEXT: sete %al -; AVX1-NEXT: negl %eax +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vtestps %xmm1, %xmm0 +; AVX1-NEXT: sbbl %eax, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v8i32_sext: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vmovmskps %ymm0, %ecx ; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: cmpl $255, %ecx -; AVX2-NEXT: sete %al -; AVX2-NEXT: negl %eax +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestps %ymm1, %ymm0 +; AVX2-NEXT: sbbl %eax, %eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_v8i32_sext: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vmovmskps %ymm0, %ecx ; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: cmpl $255, %ecx -; AVX512-NEXT: sete %al -; AVX512-NEXT: negl %eax +; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vtestps %ymm1, %ymm0 +; AVX512-NEXT: sbbl %eax, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %c = icmp sgt <8 x i32> %a0, %a1 @@ -941,9 +964,9 @@ define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) { ; AVX1OR2-LABEL: bool_reduction_v2f64: ; AVX1OR2: # %bb.0: ; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax -; AVX1OR2-NEXT: cmpb $3, %al -; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vtestpd %xmm1, %xmm0 +; AVX1OR2-NEXT: setb %al ; AVX1OR2-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v2f64: @@ -972,9 +995,9 @@ define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) { ; AVX1OR2-LABEL: bool_reduction_v4f32: ; AVX1OR2: # %bb.0: ; AVX1OR2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; AVX1OR2-NEXT: vmovmskps %xmm0, %eax -; AVX1OR2-NEXT: cmpb $15, %al -; AVX1OR2-NEXT: sete %al +; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vtestps %xmm1, %xmm0 +; AVX1OR2-NEXT: setb %al ; AVX1OR2-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v4f32: @@ -1004,14 +1027,24 @@ define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) { ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; -; AVX1OR2-LABEL: bool_reduction_v4f64: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 -; AVX1OR2-NEXT: vmovmskpd %ymm0, %eax -; AVX1OR2-NEXT: cmpb $15, %al -; AVX1OR2-NEXT: sete %al -; AVX1OR2-NEXT: vzeroupper -; AVX1OR2-NEXT: retq +; AVX1-LABEL: bool_reduction_v4f64: +; AVX1: # %bb.0: +; AVX1-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 +; AVX1-NEXT: vtestpd %ymm1, %ymm0 +; AVX1-NEXT: setb %al +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: bool_reduction_v4f64: +; AVX2: # %bb.0: +; AVX2-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestpd %ymm1, %ymm0 +; AVX2-NEXT: setb %al +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v4f64: ; AVX512: # %bb.0: @@ -1042,14 +1075,24 @@ define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) { ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; -; AVX1OR2-LABEL: bool_reduction_v8f32: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0 -; AVX1OR2-NEXT: vmovmskps %ymm0, %eax -; AVX1OR2-NEXT: cmpb $-1, %al -; AVX1OR2-NEXT: sete %al -; AVX1OR2-NEXT: vzeroupper -; AVX1OR2-NEXT: retq +; AVX1-LABEL: bool_reduction_v8f32: +; AVX1: # %bb.0: +; AVX1-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 +; AVX1-NEXT: vtestps %ymm1, %ymm0 +; AVX1-NEXT: setb %al +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: bool_reduction_v8f32: +; AVX2: # %bb.0: +; AVX2-NEXT: vcmpneqps %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestps %ymm1, %ymm0 +; AVX2-NEXT: setb %al +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq ; ; AVX512-LABEL: bool_reduction_v8f32: ; AVX512: # %bb.0: @@ -1278,18 +1321,18 @@ define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) { ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovmskpd %xmm0, %eax -; AVX1-NEXT: cmpl $3, %eax -; AVX1-NEXT: sete %al +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vtestpd %xmm1, %xmm0 +; AVX1-NEXT: setb %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: bool_reduction_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vmovmskpd %ymm0, %eax -; AVX2-NEXT: cmpb $15, %al -; AVX2-NEXT: sete %al +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestpd %ymm1, %ymm0 +; AVX2-NEXT: setb %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -- 2.7.4