From 4b2be144e170504f62362e044a3c4ad3b24da880 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Tue, 4 Apr 2023 00:14:02 -0500 Subject: [PATCH] [X86] Prefer `vpternlog` instead of `blendv` for `vselect` on masks. This rarely comes up because most `vselect` are lowered with actually avx512 mask instructions, but is an improvement in the rare cases. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D145221 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 21 +- llvm/test/CodeGen/X86/avx512-gfni-intrinsics.ll | 52 ++--- llvm/test/CodeGen/X86/var-permute-256.ll | 30 +-- llvm/test/CodeGen/X86/vector-shuffle-v1.ll | 4 +- llvm/test/CodeGen/X86/vselect-avx.ll | 2 +- llvm/test/CodeGen/X86/vselect-pcmp.ll | 283 +++++++++++++++++------- 6 files changed, 260 insertions(+), 132 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index e068912..7ff35a3 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1034,18 +1034,27 @@ void X86DAGToDAGISel::PreprocessISelDAG() { break; } case ISD::VSELECT: { - // Replace VSELECT with non-mask conditions with with BLENDV. - if (N->getOperand(0).getValueType().getVectorElementType() == MVT::i1) + // Replace VSELECT with non-mask conditions with with BLENDV/VPTERNLOG. + EVT EleVT = N->getOperand(0).getValueType().getVectorElementType(); + if (EleVT == MVT::i1) break; assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!"); assert(N->getValueType(0).getVectorElementType() != MVT::i16 && "We can't replace VSELECT with BLENDV in vXi16!"); - SDValue Blendv = - CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0), - N->getOperand(0), N->getOperand(1), N->getOperand(2)); + SDValue R; + if (Subtarget->hasVLX() && CurDAG->ComputeNumSignBits(N->getOperand(0)) == + EleVT.getSizeInBits()) { + R = CurDAG->getNode(X86ISD::VPTERNLOG, SDLoc(N), N->getValueType(0), + N->getOperand(0), N->getOperand(1), N->getOperand(2), + CurDAG->getTargetConstant(0xCA, SDLoc(N), MVT::i8)); + } else { + R = CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0), + N->getOperand(0), N->getOperand(1), + N->getOperand(2)); + } --I; - CurDAG->ReplaceAllUsesWith(N, Blendv.getNode()); + CurDAG->ReplaceAllUsesWith(N, R.getNode()); ++I; MadeChange = true; continue; diff --git a/llvm/test/CodeGen/X86/avx512-gfni-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-gfni-intrinsics.ll index 83b30d99..bafa33f 100644 --- a/llvm/test/CodeGen/X86/avx512-gfni-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-gfni-intrinsics.ll @@ -35,7 +35,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineinvqb_128(<16 x i8> ; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff] ; X86NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd] ; X86NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc] -; X86NOBW-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm2 # encoding: [0xc4,0xe3,0x69,0x4c,0xd0,0x50] +; X86NOBW-NEXT: vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8] ; X86NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86NOBW-NEXT: retl # encoding: [0xc3] @@ -49,7 +49,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineinvqb_128(<16 x i8> ; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff] ; X64NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd] ; X64NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc] -; X64NOBW-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm2 # encoding: [0xc4,0xe3,0x69,0x4c,0xd0,0x50] +; X64NOBW-NEXT: vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8] ; X64NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64NOBW-NEXT: retq # encoding: [0xc3] @@ -100,7 +100,7 @@ define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineinvqb_256(<32 x i8> ; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed] ; X86NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01] ; X86NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc] -; X86NOBW-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm2 # encoding: [0xc4,0xe3,0x6d,0x4c,0xd0,0x50] +; X86NOBW-NEXT: vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8] ; X86NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86NOBW-NEXT: retl # encoding: [0xc3] ; @@ -118,7 +118,7 @@ define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineinvqb_256(<32 x i8> ; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed] ; X64NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01] ; X64NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc] -; X64NOBW-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm2 # encoding: [0xc4,0xe3,0x6d,0x4c,0xd0,0x50] +; X64NOBW-NEXT: vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8] ; X64NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64NOBW-NEXT: retq # encoding: [0xc3] %1 = bitcast i32 %mask to <32 x i1> @@ -252,7 +252,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineqb_128(<16 x i8> %s ; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff] ; X86NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd] ; X86NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc] -; X86NOBW-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm2 # encoding: [0xc4,0xe3,0x69,0x4c,0xd0,0x50] +; X86NOBW-NEXT: vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8] ; X86NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86NOBW-NEXT: retl # encoding: [0xc3] @@ -266,7 +266,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_vgf2p8affineqb_128(<16 x i8> %s ; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff] ; X64NOBW-NEXT: vpmovdb %zmm1, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xcd] ; X64NOBW-NEXT: vpand %xmm4, %xmm5, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xd1,0xdb,0xcc] -; X64NOBW-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm2 # encoding: [0xc4,0xe3,0x69,0x4c,0xd0,0x50] +; X64NOBW-NEXT: vpternlogq $184, %xmm0, %xmm5, %xmm2 # encoding: [0x62,0xf3,0xd5,0x08,0x25,0xd0,0xb8] ; X64NOBW-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64NOBW-NEXT: retq # encoding: [0xc3] @@ -317,7 +317,7 @@ define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineqb_256(<32 x i8> %s ; X86NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed] ; X86NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01] ; X86NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc] -; X86NOBW-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm2 # encoding: [0xc4,0xe3,0x6d,0x4c,0xd0,0x50] +; X86NOBW-NEXT: vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8] ; X86NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86NOBW-NEXT: retl # encoding: [0xc3] ; @@ -335,7 +335,7 @@ define { <32 x i8>, <32 x i8>, <32 x i8> } @test_vgf2p8affineqb_256(<32 x i8> %s ; X64NOBW-NEXT: vpmovdb %zmm5, %xmm5 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xed] ; X64NOBW-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm5 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xed,0x01] ; X64NOBW-NEXT: vpand %ymm4, %ymm5, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xd5,0xdb,0xcc] -; X64NOBW-NEXT: vpblendvb %ymm5, %ymm0, %ymm2, %ymm2 # encoding: [0xc4,0xe3,0x6d,0x4c,0xd0,0x50] +; X64NOBW-NEXT: vpternlogq $184, %ymm0, %ymm5, %ymm2 # encoding: [0x62,0xf3,0xd5,0x28,0x25,0xd0,0xb8] ; X64NOBW-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64NOBW-NEXT: retq # encoding: [0xc3] %1 = bitcast i32 %mask to <32 x i1> @@ -466,20 +466,20 @@ define <16 x i8> @test_vgf2p8mulb_128_mask(<16 x i8> %src1, <16 x i8> %src2, <16 ; X86NOBW-LABEL: test_vgf2p8mulb_128_mask: ; X86NOBW: # %bb.0: ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc1] -; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff] -; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9] -; X86NOBW-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0 # encoding: [0xc4,0xe3,0x69,0x4c,0xc0,0x10] +; X86NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc9] +; X86NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff] +; X86NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0] +; X86NOBW-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0xca] ; X86NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86NOBW-NEXT: retl # encoding: [0xc3] ; ; X64NOBW-LABEL: test_vgf2p8mulb_128_mask: ; X64NOBW: # %bb.0: ; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc1] -; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff] -; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9] -; X64NOBW-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0 # encoding: [0xc4,0xe3,0x69,0x4c,0xc0,0x10] +; X64NOBW-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xc9] +; X64NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff] +; X64NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0] +; X64NOBW-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0xca] ; X64NOBW-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64NOBW-NEXT: retq # encoding: [0xc3] %1 = bitcast i16 %mask to <16 x i1> @@ -555,13 +555,13 @@ define <32 x i8> @test_vgf2p8mulb_256_mask(<32 x i8> %src1, <32 x i8> %src2, <32 ; X86NOBW: # %bb.0: ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] ; X86NOBW-NEXT: kmovw {{[0-9]+}}(%esp), %k2 # encoding: [0xc5,0xf8,0x90,0x54,0x24,0x06] -; X86NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc1] -; X86NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff] -; X86NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9] +; X86NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc9] +; X86NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff] +; X86NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0] ; X86NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff] ; X86NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb] -; X86NOBW-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcb,0x01] -; X86NOBW-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0 # encoding: [0xc4,0xe3,0x6d,0x4c,0xc0,0x10] +; X86NOBW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc3,0x01] +; X86NOBW-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0xca] ; X86NOBW-NEXT: retl # encoding: [0xc3] ; ; X64NOBW-LABEL: test_vgf2p8mulb_256_mask: @@ -569,13 +569,13 @@ define <32 x i8> @test_vgf2p8mulb_256_mask(<32 x i8> %src1, <32 x i8> %src2, <32 ; X64NOBW-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] ; X64NOBW-NEXT: shrl $16, %edi # encoding: [0xc1,0xef,0x10] ; X64NOBW-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7] -; X64NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc1] -; X64NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc9,0xff] -; X64NOBW-NEXT: vpmovdb %zmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc9] +; X64NOBW-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xc9] +; X64NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff] +; X64NOBW-NEXT: vpmovdb %zmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0] ; X64NOBW-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} # encoding: [0x62,0xf3,0x65,0xca,0x25,0xdb,0xff] ; X64NOBW-NEXT: vpmovdb %zmm3, %xmm3 # encoding: [0x62,0xf2,0x7e,0x48,0x31,0xdb] -; X64NOBW-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xcb,0x01] -; X64NOBW-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0 # encoding: [0xc4,0xe3,0x6d,0x4c,0xc0,0x10] +; X64NOBW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc3,0x01] +; X64NOBW-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0xca] ; X64NOBW-NEXT: retq # encoding: [0xc3] %1 = bitcast i32 %mask to <32 x i1> %2 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2) diff --git a/llvm/test/CodeGen/X86/var-permute-256.ll b/llvm/test/CodeGen/X86/var-permute-256.ll index 6af3126..ea337ef 100644 --- a/llvm/test/CodeGen/X86/var-permute-256.ll +++ b/llvm/test/CodeGen/X86/var-permute-256.ll @@ -194,12 +194,12 @@ define <16 x i16> @var_shuffle_v16i16(<16 x i16> %v, <16 x i16> %indices) nounwi ; AVX512VLDQ: # %bb.0: ; AVX512VLDQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512VLDQ-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3] +; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 ; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm2, %ymm2 -; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3 +; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLDQ-NEXT: vpternlogq $202, %ymm2, %ymm3, %ymm0 ; AVX512VLDQ-NEXT: retq ; ; AVX512VLBW-LABEL: var_shuffle_v16i16: @@ -313,9 +313,9 @@ define <32 x i8> @var_shuffle_v32i8(<32 x i8> %v, <32 x i8> %indices) nounwind { ; AVX512VLDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3] ; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm2, %ymm2 ; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3 +; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLDQ-NEXT: vpternlogq $202, %ymm3, %ymm2, %ymm0 ; AVX512VLDQ-NEXT: retq ; ; AVX512VLBW-LABEL: var_shuffle_v32i8: @@ -739,11 +739,11 @@ define <16 x i16> @var_shuffle_v16i16_from_v8i16(<8 x i16> %v, <16 x i16> %indic ; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; AVX512VLDQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512VLDQ-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2 ; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2 +; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3 +; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLDQ-NEXT: vpternlogq $202, %ymm2, %ymm3, %ymm0 ; AVX512VLDQ-NEXT: retq ; ; AVX512VLBW-LABEL: var_shuffle_v16i16_from_v8i16: @@ -857,9 +857,9 @@ define <32 x i8> @var_shuffle_v32i8_from_v16i8(<16 x i8> %v, <32 x i8> %indices) ; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 ; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2 ; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm0 -; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLDQ-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm3 +; AVX512VLDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0 +; AVX512VLDQ-NEXT: vpternlogq $202, %ymm3, %ymm2, %ymm0 ; AVX512VLDQ-NEXT: retq ; ; AVX512VLBW-LABEL: var_shuffle_v32i8_from_v16i8: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll index cb36c8a..ec0bddc 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll @@ -361,7 +361,7 @@ define <32 x i8> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0 ; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512VL-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 +; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; ; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i8: @@ -459,7 +459,7 @@ define <32 x i8> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0 ; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512VL-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm0 +; AVX512VL-NEXT: vpternlogq $202, %ymm3, %ymm2, %ymm0 ; AVX512VL-NEXT: retq ; ; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i8_split: diff --git a/llvm/test/CodeGen/X86/vselect-avx.ll b/llvm/test/CodeGen/X86/vselect-avx.ll index 0fc0035..cf25020 100644 --- a/llvm/test/CodeGen/X86/vselect-avx.ll +++ b/llvm/test/CodeGen/X86/vselect-avx.ll @@ -134,7 +134,7 @@ define void @test3(<4 x i32> %induction30, ptr %tmp16, ptr %tmp17, <4 x i16> %t ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX512-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm1 +; AVX512-NEXT: vpternlogq $226, %xmm2, %xmm0, %xmm1 ; AVX512-NEXT: vmovq %xmm0, (%rdi) ; AVX512-NEXT: vmovq %xmm1, (%rsi) ; AVX512-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vselect-pcmp.ll b/llvm/test/CodeGen/X86/vselect-pcmp.ll index 2f2ca9a..d89c4ab 100644 --- a/llvm/test/CodeGen/X86/vselect-pcmp.ll +++ b/llvm/test/CodeGen/X86/vselect-pcmp.ll @@ -23,12 +23,26 @@ define <16 x i8> @signbit_sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) ; Sorry 16-bit, you're not important enough to support? define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) { -; AVX-LABEL: signbit_sel_v8i16: -; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 -; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 -; AVX-NEXT: retq +; AVX12-LABEL: signbit_sel_v8i16: +; AVX12: # %bb.0: +; AVX12-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX12-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 +; AVX12-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v8i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 +; AVX512F-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: signbit_sel_v8i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 +; AVX512VL-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0 +; AVX512VL-NEXT: retq ; ; XOP-LABEL: signbit_sel_v8i16: ; XOP: # %bb.0: @@ -238,12 +252,19 @@ define <16 x i16> @signbit_sel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> % ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: signbit_sel_v16i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 -; AVX512-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: signbit_sel_v16i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: signbit_sel_v16i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 +; AVX512VL-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0 +; AVX512VL-NEXT: retq ; ; XOP-LABEL: signbit_sel_v16i16: ; XOP: # %bb.0: @@ -581,13 +602,21 @@ define <16 x i16> @blend_splat1_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, < ; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: blend_splat1_mask_cond_v16i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 -; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: blend_splat1_mask_cond_v16i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: blend_splat1_mask_cond_v16i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0 +; AVX512VL-NEXT: retq ; ; XOP-LABEL: blend_splat1_mask_cond_v16i16: ; XOP: # %bb.0: @@ -612,13 +641,21 @@ define <16 x i8> @blend_splat1_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x ; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 ; AVX12-NEXT: retq ; -; AVX512-LABEL: blend_splat1_mask_cond_v16i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 -; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: blend_splat1_mask_cond_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 +; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: blend_splat1_mask_cond_v16i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0 +; AVX512VL-NEXT: retq ; ; XOP-LABEL: blend_splat1_mask_cond_v16i8: ; XOP: # %bb.0: @@ -709,13 +746,21 @@ define <8 x i16> @blend_splatmax_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 ; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 ; AVX12-NEXT: retq ; -; AVX512-LABEL: blend_splatmax_mask_cond_v8i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 -; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: blend_splatmax_mask_cond_v8i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 +; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: blend_splatmax_mask_cond_v8i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0 +; AVX512VL-NEXT: retq ; ; XOP-LABEL: blend_splatmax_mask_cond_v8i16: ; XOP: # %bb.0: @@ -748,13 +793,21 @@ define <32 x i8> @blend_splatmax_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 ; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: blend_splatmax_mask_cond_v32i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 -; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: blend_splatmax_mask_cond_v32i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: blend_splatmax_mask_cond_v32i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0 +; AVX512VL-NEXT: retq ; ; XOP-LABEL: blend_splatmax_mask_cond_v32i8: ; XOP: # %bb.0: @@ -878,13 +931,21 @@ define <16 x i16> @blend_splat_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <1 ; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: blend_splat_mask_cond_v16i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 -; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: blend_splat_mask_cond_v16i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: blend_splat_mask_cond_v16i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0 +; AVX512VL-NEXT: retq ; ; XOP-LABEL: blend_splat_mask_cond_v16i16: ; XOP: # %bb.0: @@ -909,13 +970,21 @@ define <16 x i8> @blend_splat_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x ; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 ; AVX12-NEXT: retq ; -; AVX512-LABEL: blend_splat_mask_cond_v16i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 -; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: blend_splat_mask_cond_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 +; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: blend_splat_mask_cond_v16i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0 +; AVX512VL-NEXT: retq ; ; XOP-LABEL: blend_splat_mask_cond_v16i8: ; XOP: # %bb.0: @@ -1017,13 +1086,29 @@ define <4 x i32> @blend_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z } define <8 x i16> @blend_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) { -; AVX-LABEL: blend_mask_cond_v8i16: -; AVX: # %bb.0: -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 -; AVX-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 -; AVX-NEXT: retq +; AVX12-LABEL: blend_mask_cond_v8i16: +; AVX12: # %bb.0: +; AVX12-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX12-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX12-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 +; AVX12-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: blend_mask_cond_v8i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 +; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: blend_mask_cond_v8i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0 +; AVX512VL-NEXT: retq ; ; XOP-LABEL: blend_mask_cond_v8i16: ; XOP: # %bb.0: @@ -1039,13 +1124,29 @@ define <8 x i16> @blend_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z } define <16 x i8> @blend_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) { -; AVX-LABEL: blend_mask_cond_v16i8: -; AVX: # %bb.0: -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 -; AVX-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 -; AVX-NEXT: retq +; AVX12-LABEL: blend_mask_cond_v16i8: +; AVX12: # %bb.0: +; AVX12-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX12-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX12-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 +; AVX12-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: blend_mask_cond_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 +; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: blend_mask_cond_v16i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpternlogq $202, %xmm2, %xmm1, %xmm0 +; AVX512VL-NEXT: retq ; ; XOP-LABEL: blend_mask_cond_v16i8: ; XOP: # %bb.0: @@ -1176,13 +1277,21 @@ define <16 x i16> @blend_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i1 ; AVX2-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: blend_mask_cond_v16i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 -; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: blend_mask_cond_v16i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: blend_mask_cond_v16i16: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0 +; AVX512VL-NEXT: retq ; ; XOP-LABEL: blend_mask_cond_v16i16: ; XOP: # %bb.0: @@ -1223,13 +1332,21 @@ define <32 x i8> @blend_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z ; AVX2-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: blend_mask_cond_v32i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 -; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: blend_mask_cond_v32i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: blend_mask_cond_v32i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0 +; AVX512VL-NEXT: retq ; ; XOP-LABEL: blend_mask_cond_v32i8: ; XOP: # %bb.0: @@ -1306,3 +1423,5 @@ define void @PR46531(ptr %x, ptr %y, ptr %z) { } attributes #0 = { "no-nans-fp-math"="true" } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX: {{.*}} -- 2.7.4