From c6735aecfa750509ae0dfb223f9d458fe4f81b51 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 6 Aug 2019 11:00:34 +0000 Subject: [PATCH] [X86][SSE] Enable min/max partial reduction As mentioned on D65047 / rL366933 the plan is to enable partial reduction handling wherever possible. llvm-svn: 368016 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/test/CodeGen/X86/horizontal-reduce-smax.ll | 80 +++----- llvm/test/CodeGen/X86/horizontal-reduce-smin.ll | 80 +++----- llvm/test/CodeGen/X86/horizontal-reduce-umax.ll | 232 ++++++++++++++++-------- llvm/test/CodeGen/X86/horizontal-reduce-umin.ll | 56 +----- 5 files changed, 219 insertions(+), 231 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 939fecf..66d4af1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35384,7 +35384,7 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG, // Check for SMAX/SMIN/UMAX/UMIN horizontal reduction patterns. ISD::NodeType BinOp; SDValue Src = DAG.matchBinOpReduction( - Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN}); + Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN}, true); if (!Src) return SDValue(); diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll index 32fee3a..9419aff 100644 --- a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll @@ -1859,13 +1859,10 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { ; ; X86-AVX-LABEL: test_reduce_v16i16_v8i16: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpxor LCPI12_0, %xmm0, %xmm0 +; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX-NEXT: vzeroupper ; X86-AVX-NEXT: retl @@ -1894,13 +1891,10 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { ; ; X64-AVX-LABEL: test_reduce_v16i16_v8i16: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X64-AVX-NEXT: vmovd %xmm0, %eax +; X64-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq @@ -1942,13 +1936,10 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { ; ; X86-AVX-LABEL: test_reduce_v32i16_v8i16: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpxor LCPI13_0, %xmm0, %xmm0 +; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX-NEXT: vzeroupper ; X86-AVX-NEXT: retl @@ -1977,13 +1968,10 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { ; ; X64-AVX-LABEL: test_reduce_v32i16_v8i16: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X64-AVX-NEXT: vmovd %xmm0, %eax +; X64-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq @@ -2047,15 +2035,12 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { ; ; X86-AVX-LABEL: test_reduce_v32i8_v16i8: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpxor LCPI14_0, %xmm0, %xmm0 ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: xorb $127, %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: vzeroupper ; X86-AVX-NEXT: retl @@ -2106,15 +2091,12 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { ; ; X64-AVX-LABEL: test_reduce_v32i8_v16i8: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: xorb $127, %al ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq @@ -2181,15 +2163,12 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { ; ; X86-AVX-LABEL: test_reduce_v64i8_v16i8: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpxor LCPI15_0, %xmm0, %xmm0 ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: xorb $127, %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: vzeroupper ; X86-AVX-NEXT: retl @@ -2240,15 +2219,12 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { ; ; X64-AVX-LABEL: test_reduce_v64i8_v16i8: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: xorb $127, %al ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll index a344322..df7aaf6 100644 --- a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll @@ -1863,13 +1863,10 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { ; ; X86-AVX-LABEL: test_reduce_v16i16_v8i16: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpxor LCPI12_0, %xmm0, %xmm0 +; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX-NEXT: vzeroupper ; X86-AVX-NEXT: retl @@ -1898,13 +1895,10 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { ; ; X64-AVX-LABEL: test_reduce_v16i16_v8i16: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X64-AVX-NEXT: vmovd %xmm0, %eax +; X64-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq @@ -1946,13 +1940,10 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { ; ; X86-AVX-LABEL: test_reduce_v32i16_v8i16: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpxor LCPI13_0, %xmm0, %xmm0 +; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX-NEXT: vzeroupper ; X86-AVX-NEXT: retl @@ -1981,13 +1972,10 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { ; ; X64-AVX-LABEL: test_reduce_v32i16_v8i16: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X64-AVX-NEXT: vmovd %xmm0, %eax +; X64-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq @@ -2051,15 +2039,12 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { ; ; X86-AVX-LABEL: test_reduce_v32i8_v16i8: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpxor LCPI14_0, %xmm0, %xmm0 ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: xorb $-128, %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: vzeroupper ; X86-AVX-NEXT: retl @@ -2110,15 +2095,12 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { ; ; X64-AVX-LABEL: test_reduce_v32i8_v16i8: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: xorb $-128, %al ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq @@ -2185,15 +2167,12 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { ; ; X86-AVX-LABEL: test_reduce_v64i8_v16i8: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpxor LCPI15_0, %xmm0, %xmm0 ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: xorb $-128, %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: vzeroupper ; X86-AVX-NEXT: retl @@ -2244,15 +2223,12 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { ; ; X64-AVX-LABEL: test_reduce_v64i8_v16i8: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: xorb $-128, %al ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll index 05cd44d..b26bc4c 100644 --- a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll @@ -2061,13 +2061,11 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { ; ; X86-AVX-LABEL: test_reduce_v16i16_v8i16: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: notl %eax ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX-NEXT: vzeroupper ; X86-AVX-NEXT: retl @@ -2104,18 +2102,37 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-SSE42-NEXT: retq ; -; X64-AVX-LABEL: test_reduce_v16i16_v8i16: -; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vmovd %xmm0, %eax -; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax -; X64-AVX-NEXT: vzeroupper -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_reduce_v16i16_v8i16: +; X64-AVX1: ## %bb.0: +; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX1-NEXT: vmovd %xmm0, %eax +; X64-AVX1-NEXT: notl %eax +; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-AVX1-NEXT: vzeroupper +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_reduce_v16i16_v8i16: +; X64-AVX2: ## %bb.0: +; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX2-NEXT: vmovd %xmm0, %eax +; X64-AVX2-NEXT: notl %eax +; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: test_reduce_v16i16_v8i16: +; X64-AVX512: ## %bb.0: +; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX512-NEXT: vmovd %xmm0, %eax +; X64-AVX512-NEXT: notl %eax +; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> %2 = icmp ugt <16 x i16> %a0, %1 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 @@ -2164,13 +2181,11 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { ; ; X86-AVX-LABEL: test_reduce_v32i16_v8i16: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: notl %eax ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX-NEXT: vzeroupper ; X86-AVX-NEXT: retl @@ -2207,18 +2222,37 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-SSE42-NEXT: retq ; -; X64-AVX-LABEL: test_reduce_v32i16_v8i16: -; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vmovd %xmm0, %eax -; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax -; X64-AVX-NEXT: vzeroupper -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_reduce_v32i16_v8i16: +; X64-AVX1: ## %bb.0: +; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX1-NEXT: vmovd %xmm0, %eax +; X64-AVX1-NEXT: notl %eax +; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-AVX1-NEXT: vzeroupper +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_reduce_v32i16_v8i16: +; X64-AVX2: ## %bb.0: +; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX2-NEXT: vmovd %xmm0, %eax +; X64-AVX2-NEXT: notl %eax +; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: test_reduce_v32i16_v8i16: +; X64-AVX512: ## %bb.0: +; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX512-NEXT: vmovd %xmm0, %eax +; X64-AVX512-NEXT: notl %eax +; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> %2 = icmp ugt <32 x i16> %a0, %1 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 @@ -2264,15 +2298,13 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { ; ; X86-AVX-LABEL: test_reduce_v32i8_v16i8: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: notb %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: vzeroupper ; X86-AVX-NEXT: retl @@ -2306,20 +2338,43 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq ; -; X64-AVX-LABEL: test_reduce_v32i8_v16i8: -; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax -; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax -; X64-AVX-NEXT: vzeroupper -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_reduce_v32i8_v16i8: +; X64-AVX1: ## %bb.0: +; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: notb %al +; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax +; X64-AVX1-NEXT: vzeroupper +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_reduce_v32i8_v16i8: +; X64-AVX2: ## %bb.0: +; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: notb %al +; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: test_reduce_v32i8_v16i8: +; X64-AVX512: ## %bb.0: +; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: notb %al +; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> %2 = icmp ugt <32 x i8> %a0, %1 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 @@ -2368,15 +2423,13 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { ; ; X86-AVX-LABEL: test_reduce_v64i8_v16i8: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: notb %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: vzeroupper ; X86-AVX-NEXT: retl @@ -2410,20 +2463,43 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq ; -; X64-AVX-LABEL: test_reduce_v64i8_v16i8: -; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 -; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax -; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax -; X64-AVX-NEXT: vzeroupper -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_reduce_v64i8_v16i8: +; X64-AVX1: ## %bb.0: +; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: notb %al +; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax +; X64-AVX1-NEXT: vzeroupper +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: test_reduce_v64i8_v16i8: +; X64-AVX2: ## %bb.0: +; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: notb %al +; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: test_reduce_v64i8_v16i8: +; X64-AVX512: ## %bb.0: +; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 +; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: notb %al +; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> %2 = icmp ugt <64 x i8> %a0, %1 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll index ed64ec2..5b4d3165 100644 --- a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll @@ -1922,12 +1922,7 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { ; ; X86-AVX-LABEL: test_reduce_v16i16_v8i16: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X86-AVX-NEXT: vmovd %xmm0, %eax ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX-NEXT: vzeroupper @@ -1964,12 +1959,7 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { ; ; X64-AVX-LABEL: test_reduce_v16i16_v8i16: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X64-AVX-NEXT: vmovd %xmm0, %eax ; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX-NEXT: vzeroupper @@ -2019,12 +2009,7 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { ; ; X86-AVX-LABEL: test_reduce_v32i16_v8i16: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X86-AVX-NEXT: vmovd %xmm0, %eax ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX-NEXT: vzeroupper @@ -2061,12 +2046,7 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { ; ; X64-AVX-LABEL: test_reduce_v32i16_v8i16: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X64-AVX-NEXT: vmovd %xmm0, %eax ; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX-NEXT: vzeroupper @@ -2113,14 +2093,9 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { ; ; X86-AVX-LABEL: test_reduce_v32i8_v16i8: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: vzeroupper @@ -2154,14 +2129,9 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { ; ; X64-AVX-LABEL: test_reduce_v32i8_v16i8: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX-NEXT: vzeroupper @@ -2211,14 +2181,9 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { ; ; X86-AVX-LABEL: test_reduce_v64i8_v16i8: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: vzeroupper @@ -2252,14 +2217,9 @@ define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { ; ; X64-AVX-LABEL: test_reduce_v64i8_v16i8: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 -; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 ; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX-NEXT: vzeroupper -- 2.7.4