From: Craig Topper Date: Sun, 8 Sep 2019 00:43:52 +0000 (+0000) Subject: [X86] Make getZeroVector return floating point vectors in their native type on SSE2... X-Git-Tag: llvmorg-11-init~9739 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=37dd59298fd46e28ae2b2569465c5195d5708a0a;p=platform%2Fupstream%2Fllvm.git [X86] Make getZeroVector return floating point vectors in their native type on SSE2 and later. isel used to require zero vectors to be canonicalized to a single type to minimize the number of patterns needed to match. This is no longer required. I plan to do this to integers too, but floating point was simpler to start with. Integer has a complication where v32i16/v64i8 aren't legal when the other 512-bit integer types are. llvm-svn: 371325 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8e45651..552c917 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5403,6 +5403,8 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget, SDValue Vec; if (!Subtarget.hasSSE2() && VT.is128BitVector()) { Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32); + } else if (VT.isFloatingPoint()) { + Vec = DAG.getConstantFP(+0.0, dl, VT); } else if (VT.getVectorElementType() == MVT::i1) { assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && "Unexpected vector type"); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 31a46e6..94108402 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -412,6 +412,11 @@ def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", [(set VR512:$dst, (v16i32 immAllOnesV))]>; } +let Predicates = [HasAVX512] in { +def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; +def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; +} + // Alias instructions that allow VPTERNLOG to be used with a mask to create // a mix of all ones and all zeros elements. This is done this way to force // the same register to be used as input for all three sources. @@ -436,6 +441,13 @@ def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", [(set VR256X:$dst, (v8i32 immAllZerosV))]>; } +let Predicates = [HasAVX512] in { +def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; +def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; +def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; +} + // Alias instructions that map fld0 to xorps for sse or vxorps for avx. // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index b3c8898..1626228 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -128,13 +128,15 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1, SchedRW = [WriteZero] in { + isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in { def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4f32 immAllZerosV))]>; } -let Predicates = [NoAVX512] in +let Predicates = [NoAVX512] in { def : Pat<(v4i32 immAllZerosV), (V_SET0)>; +def : Pat<(v2f64 immAllZerosV), (V_SET0)>; +} // The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, @@ -147,6 +149,11 @@ def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8i32 immAllZerosV))]>; } +let Predicates = [NoAVX512] in { +def : Pat<(v8f32 immAllZerosV), (AVX_SET0)>; +def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; +} + // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, diff --git a/llvm/test/CodeGen/X86/2012-04-26-sdglue.ll b/llvm/test/CodeGen/X86/2012-04-26-sdglue.ll index afa8bf4..2590a27 100644 --- a/llvm/test/CodeGen/X86/2012-04-26-sdglue.ll +++ b/llvm/test/CodeGen/X86/2012-04-26-sdglue.ll @@ -9,17 +9,17 @@ define void @func(<4 x float> %a, <16 x i8> %b, <16 x i8> %c, <8 x float> %d, <8 ; CHECK: ## %bb.0: ; CHECK-NEXT: vmovdqu 0, %xmm0 ; CHECK-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3] -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: vmulps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vmulps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vaddps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vmulps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vaddps %ymm0, %ymm0, %ymm0 ; CHECK-NEXT: vhaddps %ymm4, %ymm0, %ymm0 ; CHECK-NEXT: vsubps %ymm0, %ymm0, %ymm0 -; CHECK-NEXT: vhaddps %ymm0, %ymm2, %ymm0 +; CHECK-NEXT: vhaddps %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: vmovaps %ymm0, (%rdi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx-basic.ll b/llvm/test/CodeGen/X86/avx-basic.ll index b85fd4e..67b0291 100644 --- a/llvm/test/CodeGen/X86/avx-basic.ll +++ b/llvm/test/CodeGen/X86/avx-basic.ll @@ -19,8 +19,8 @@ define void @zero128() nounwind ssp { define void @zero256() nounwind ssp { ; CHECK-LABEL: zero256: ; CHECK: ## %bb.0: -; CHECK-NEXT: movq _x@{{.*}}(%rip), %rax ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: movq _x@{{.*}}(%rip), %rax ; CHECK-NEXT: vmovaps %ymm0, (%rax) ; CHECK-NEXT: movq _y@{{.*}}(%rip), %rax ; CHECK-NEXT: vmovaps %ymm0, (%rax) diff --git a/llvm/test/CodeGen/X86/avx2-gather.ll b/llvm/test/CodeGen/X86/avx2-gather.ll index 23a8c9e..cf4a624 100644 --- a/llvm/test/CodeGen/X86/avx2-gather.ll +++ b/llvm/test/CodeGen/X86/avx2-gather.ll @@ -124,16 +124,16 @@ define <2 x double> @test_mm_i32gather_pd(double *%a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_i32gather_pd: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; X32-NEXT: vgatherdpd %xmm2, (%eax,%xmm0,2), %xmm1 ; X32-NEXT: vmovapd %xmm1, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm_i32gather_pd: ; X64: # %bb.0: -; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; X64-NEXT: vgatherdpd %xmm2, (%rdi,%xmm0,2), %xmm1 ; X64-NEXT: vmovapd %xmm1, %xmm0 ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/nontemporal-3.ll b/llvm/test/CodeGen/X86/nontemporal-3.ll index f0a2f6f..457e5e1 100644 --- a/llvm/test/CodeGen/X86/nontemporal-3.ll +++ b/llvm/test/CodeGen/X86/nontemporal-3.ll @@ -37,27 +37,12 @@ define void @test_zero_v2f64_align1(<2 x double>* %dst) nounwind { } define void @test_zero_v4f32_align1(<4 x float>* %dst) nounwind { -; SSE2-LABEL: test_zero_v4f32_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v4f32_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorl %eax, %eax -; SSE4A-NEXT: movntiq %rax, 8(%rdi) -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v4f32_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: retq +; SSE-LABEL: test_zero_v4f32_align1: +; SSE: # %bb.0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: movntiq %rax, 8(%rdi) +; SSE-NEXT: movntiq %rax, (%rdi) +; SSE-NEXT: retq ; ; AVX-LABEL: test_zero_v4f32_align1: ; AVX: # %bb.0: @@ -77,27 +62,12 @@ define void @test_zero_v4f32_align1(<4 x float>* %dst) nounwind { } define void @test_zero_v2i64_align1(<2 x i64>* %dst) nounwind { -; SSE2-LABEL: test_zero_v2i64_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v2i64_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorl %eax, %eax -; SSE4A-NEXT: movntiq %rax, 8(%rdi) -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v2i64_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: retq +; SSE-LABEL: test_zero_v2i64_align1: +; SSE: # %bb.0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: movntiq %rax, 8(%rdi) +; SSE-NEXT: movntiq %rax, (%rdi) +; SSE-NEXT: retq ; ; AVX-LABEL: test_zero_v2i64_align1: ; AVX: # %bb.0: @@ -117,27 +87,12 @@ define void @test_zero_v2i64_align1(<2 x i64>* %dst) nounwind { } define void @test_zero_v4i32_align1(<4 x i32>* %dst) nounwind { -; SSE2-LABEL: test_zero_v4i32_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v4i32_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorl %eax, %eax -; SSE4A-NEXT: movntiq %rax, 8(%rdi) -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v4i32_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: retq +; SSE-LABEL: test_zero_v4i32_align1: +; SSE: # %bb.0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: movntiq %rax, 8(%rdi) +; SSE-NEXT: movntiq %rax, (%rdi) +; SSE-NEXT: retq ; ; AVX-LABEL: test_zero_v4i32_align1: ; AVX: # %bb.0: @@ -157,27 +112,12 @@ define void @test_zero_v4i32_align1(<4 x i32>* %dst) nounwind { } define void @test_zero_v8i16_align1(<8 x i16>* %dst) nounwind { -; SSE2-LABEL: test_zero_v8i16_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v8i16_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorl %eax, %eax -; SSE4A-NEXT: movntiq %rax, 8(%rdi) -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v8i16_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: retq +; SSE-LABEL: test_zero_v8i16_align1: +; SSE: # %bb.0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: movntiq %rax, 8(%rdi) +; SSE-NEXT: movntiq %rax, (%rdi) +; SSE-NEXT: retq ; ; AVX-LABEL: test_zero_v8i16_align1: ; AVX: # %bb.0: @@ -197,27 +137,12 @@ define void @test_zero_v8i16_align1(<8 x i16>* %dst) nounwind { } define void @test_zero_v16i8_align1(<16 x i8>* %dst) nounwind { -; SSE2-LABEL: test_zero_v16i8_align1: -; SSE2: # %bb.0: -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: movntiq %rax, 8(%rdi) -; SSE2-NEXT: movntiq %rax, (%rdi) -; SSE2-NEXT: retq -; -; SSE4A-LABEL: test_zero_v16i8_align1: -; SSE4A: # %bb.0: -; SSE4A-NEXT: xorl %eax, %eax -; SSE4A-NEXT: movntiq %rax, 8(%rdi) -; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: retq -; -; SSE41-LABEL: test_zero_v16i8_align1: -; SSE41: # %bb.0: -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: movntiq %rax, 8(%rdi) -; SSE41-NEXT: movntiq %rax, (%rdi) -; SSE41-NEXT: retq +; SSE-LABEL: test_zero_v16i8_align1: +; SSE: # %bb.0: +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: movntiq %rax, 8(%rdi) +; SSE-NEXT: movntiq %rax, (%rdi) +; SSE-NEXT: retq ; ; AVX-LABEL: test_zero_v16i8_align1: ; AVX: # %bb.0: @@ -281,10 +206,11 @@ define void @test_zero_v8f32_align1(<8 x float>* %dst) nounwind { ; ; SSE4A-LABEL: test_zero_v8f32_align1: ; SSE4A: # %bb.0: +; SSE4A-NEXT: xorl %eax, %eax +; SSE4A-NEXT: movntiq %rax, 8(%rdi) +; SSE4A-NEXT: movntiq %rax, 24(%rdi) ; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) ; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) ; SSE4A-NEXT: retq ; @@ -725,14 +651,15 @@ define void @test_zero_v16f32_align1(<16 x float>* %dst) nounwind { ; ; SSE4A-LABEL: test_zero_v16f32_align1: ; SSE4A: # %bb.0: +; SSE4A-NEXT: xorl %eax, %eax +; SSE4A-NEXT: movntiq %rax, 24(%rdi) +; SSE4A-NEXT: movntiq %rax, 8(%rdi) +; SSE4A-NEXT: movntiq %rax, 56(%rdi) +; SSE4A-NEXT: movntiq %rax, 40(%rdi) ; SSE4A-NEXT: xorps %xmm0, %xmm0 -; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) ; SSE4A-NEXT: movntsd %xmm0, (%rdi) -; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) ; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) -; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) ; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) ; SSE4A-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/packss.ll b/llvm/test/CodeGen/X86/packss.ll index e3bd9d9..c0fa42e 100644 --- a/llvm/test/CodeGen/X86/packss.ll +++ b/llvm/test/CodeGen/X86/packss.ll @@ -356,18 +356,18 @@ define <32 x i8> @packsswb_icmp_zero_trunc_256(<16 x i16> %a0) { ; ; AVX1-LABEL: packsswb_icmp_zero_trunc_256: ; AVX1: # %bb.0: -; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpeqw %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = zero,zero,ymm0[0,1] -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 -; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = zero,zero,ymm0[0,1] +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: ret{{[l|q]}} ; diff --git a/llvm/test/CodeGen/X86/vec_ss_load_fold.ll b/llvm/test/CodeGen/X86/vec_ss_load_fold.ll index 1f55030..259c19c 100644 --- a/llvm/test/CodeGen/X86/vec_ss_load_fold.ll +++ b/llvm/test/CodeGen/X86/vec_ss_load_fold.ll @@ -64,8 +64,8 @@ define i16 @test1(float %f) nounwind { ; X32_AVX512-NEXT: vmulss LCPI0_1, %xmm0, %xmm0 ; X32_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32_AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; X32_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32_AVX512-NEXT: vminss LCPI0_2, %xmm0, %xmm0 +; X32_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32_AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; X32_AVX512-NEXT: vcvttss2si %xmm0, %eax ; X32_AVX512-NEXT: ## kill: def $ax killed $ax killed $eax @@ -77,8 +77,8 @@ define i16 @test1(float %f) nounwind { ; X64_AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; X64_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64_AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; X64_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64_AVX512-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0 +; X64_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64_AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; X64_AVX512-NEXT: vcvttss2si %xmm0, %eax ; X64_AVX512-NEXT: ## kill: def $ax killed $ax killed $eax