From: Uriel Korach Date: Mon, 6 Nov 2017 09:22:38 +0000 (+0000) Subject: [X86][AVX512] Improve lowering of AVX512 test intrinsics X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bb86686a8b6f9433954a535f99f23e11b78ca348;p=platform%2Fupstream%2Fllvm.git [X86][AVX512] Improve lowering of AVX512 test intrinsics Added TESTM and TESTNM to the list of instructions that already zeroing unused upper bits and does not need the redundant shift left and shift right instructions afterwards. Added a pattern for TESTM and TESTNM in iselLowering, so now icmp(neq,and(X,Y), 0) goes folds into TESTM and icmp(eq,and(X,Y), 0) goes folds into TESTNM This commit is a preparation for lowering the test and testn X86 intrinsics to IR. Differential Revision: https://reviews.llvm.org/D38732 llvm-svn: 317465 --- diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 01d6d0b..0cbf760 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -449,10 +449,10 @@ namespace { // Returns true if this masked compare can be implemented legally with this // type. static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) { - if (N->getOpcode() == X86ISD::PCMPEQM || - N->getOpcode() == X86ISD::PCMPGTM || - N->getOpcode() == X86ISD::CMPM || - N->getOpcode() == X86ISD::CMPMU) { + unsigned Opcode = N->getOpcode(); + if (Opcode == X86ISD::PCMPEQM || Opcode == X86ISD::PCMPGTM || + Opcode == X86ISD::CMPM || Opcode == X86ISD::TESTM || + Opcode == X86ISD::TESTNM || Opcode == X86ISD::CMPMU) { // We can get 256-bit 8 element types here without VLX being enabled. When // this happens we will use 512-bit operations and the mask will not be // zero extended. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7b88887..85fee29 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4998,6 +4998,8 @@ static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) { switch (Opcode) { default: return false; + case X86ISD::TESTM: + case X86ISD::TESTNM: case X86ISD::PCMPEQM: case X86ISD::PCMPGTM: case X86ISD::CMPM: @@ -17469,6 +17471,20 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { if (Swap) std::swap(Op0, Op1); + + // See if it is the case of CMP(EQ|NEQ,AND(A,B),ZERO) and change it to TESTM|NM. + if ((!Opc && SSECC == 4) || Opc == X86ISD::PCMPEQM) { + SDValue A = peekThroughBitcasts(Op0); + if ((A.getOpcode() == ISD::AND || A.getOpcode() == X86ISD::FAND) && + ISD::isBuildVectorAllZeros(Op1.getNode())) { + MVT VT0 = Op0.getSimpleValueType(); + SDValue RHS = DAG.getBitcast(VT0, A.getOperand(0)); + SDValue LHS = DAG.getBitcast(VT0, A.getOperand(1)); + return DAG.getNode(Opc == X86ISD::PCMPEQM ? X86ISD::TESTNM : X86ISD::TESTM, + dl, VT, RHS, LHS); + } + } + if (Opc) return DAG.getNode(Opc, dl, VT, Op0, Op1); Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM; diff --git a/llvm/test/CodeGen/X86/avx512-skx-insert-subvec.ll b/llvm/test/CodeGen/X86/avx512-skx-insert-subvec.ll index 23d6645..ff25c00 100644 --- a/llvm/test/CodeGen/X86/avx512-skx-insert-subvec.ll +++ b/llvm/test/CodeGen/X86/avx512-skx-insert-subvec.ll @@ -46,8 +46,6 @@ define <8 x i1> @test3(<4 x i1> %a) { ; CHECK: # BB#0: ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 ; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 -; CHECK-NEXT: kshiftlb $4, %k0, %k0 -; CHECK-NEXT: kshiftrb $4, %k0, %k0 ; CHECK-NEXT: vpmovm2w %k0, %xmm0 ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512bw-vec-test-testn.ll b/llvm/test/CodeGen/X86/avx512bw-vec-test-testn.ll index 6dd6440..82d0b88 100644 --- a/llvm/test/CodeGen/X86/avx512bw-vec-test-testn.ll +++ b/llvm/test/CodeGen/X86/avx512bw-vec-test-testn.ll @@ -5,9 +5,7 @@ define zeroext i32 @TEST_mm512_test_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm512_test_epi16_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 +; CHECK-NEXT: vptestmw %zmm0, %zmm1, %k0 ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -24,9 +22,7 @@ entry: define zeroext i64 @TEST_mm512_test_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm512_test_epi8_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 +; CHECK-NEXT: vptestmb %zmm0, %zmm1, %k0 ; CHECK-NEXT: kmovq %k0, %rax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -42,10 +38,8 @@ entry: define zeroext i32 @TEST_mm512_mask_test_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm512_mask_test_epi16_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: vptestmw %zmm0, %zmm1, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -63,10 +57,8 @@ entry: define zeroext i64 @TEST_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm512_mask_test_epi8_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: kmovq %rdi, %k1 -; CHECK-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: vptestmb %zmm0, %zmm1, %k0 {%k1} ; CHECK-NEXT: kmovq %k0, %rax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -84,9 +76,7 @@ entry: define zeroext i32 @TEST_mm512_testn_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm512_testn_epi16_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 +; CHECK-NEXT: vptestnmw %zmm0, %zmm1, %k0 ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -103,9 +93,7 @@ entry: define zeroext i64 @TEST_mm512_testn_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm512_testn_epi8_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 +; CHECK-NEXT: vptestnmb %zmm0, %zmm1, %k0 ; CHECK-NEXT: kmovq %k0, %rax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -121,10 +109,8 @@ entry: define zeroext i32 @TEST_mm512_mask_testn_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm512_mask_testn_epi16_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: vptestnmw %zmm0, %zmm1, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -142,10 +128,8 @@ entry: define zeroext i64 @TEST_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm512_mask_testn_epi8_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: kmovq %rdi, %k1 -; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: vptestnmb %zmm0, %zmm1, %k0 {%k1} ; CHECK-NEXT: kmovq %k0, %rax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512bwvl-vec-test-testn.ll b/llvm/test/CodeGen/X86/avx512bwvl-vec-test-testn.ll index f67ceb2..44075de 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-vec-test-testn.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-vec-test-testn.ll @@ -5,9 +5,7 @@ define zeroext i16 @TEST_mm_test_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm_test_epi8_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k0 +; CHECK-NEXT: vptestmb %xmm0, %xmm1, %k0 ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: retq @@ -23,10 +21,8 @@ entry: define zeroext i16 @TEST_mm_mask_test_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm_mask_test_epi8_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: vptestmb %xmm0, %xmm1, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: retq @@ -44,9 +40,7 @@ entry: define zeroext i8 @TEST_mm_test_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm_test_epi16_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k0 +; CHECK-NEXT: vptestmw %xmm0, %xmm1, %k0 ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq @@ -62,10 +56,8 @@ entry: define zeroext i8 @TEST_mm_mask_test_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm_mask_test_epi16_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: vptestmw %xmm0, %xmm1, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq @@ -83,9 +75,7 @@ entry: define zeroext i16 @TEST_mm_testn_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm_testn_epi8_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 +; CHECK-NEXT: vptestnmb %xmm0, %xmm1, %k0 ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: retq @@ -101,10 +91,8 @@ entry: define zeroext i16 @TEST_mm_mask_testn_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm_mask_testn_epi8_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: vptestnmb %xmm0, %xmm1, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: retq @@ -122,9 +110,7 @@ entry: define zeroext i8 @TEST_mm_testn_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm_testn_epi16_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 +; CHECK-NEXT: vptestnmw %xmm0, %xmm1, %k0 ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq @@ -140,10 +126,8 @@ entry: define zeroext i8 @TEST_mm_mask_testn_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm_mask_testn_epi16_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: vptestnmw %xmm0, %xmm1, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: retq @@ -161,9 +145,7 @@ entry: define i32 @TEST_mm256_test_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm256_test_epi8_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 +; CHECK-NEXT: vptestmb %ymm0, %ymm1, %k0 ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -179,10 +161,8 @@ entry: define i32 @TEST_mm256_mask_test_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm256_mask_test_epi8_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: vptestmb %ymm0, %ymm1, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -200,9 +180,7 @@ entry: define zeroext i16 @TEST_mm256_test_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm256_test_epi16_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k0 +; CHECK-NEXT: vptestmw %ymm0, %ymm1, %k0 ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: vzeroupper @@ -219,10 +197,8 @@ entry: define zeroext i16 @TEST_mm256_mask_test_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm256_mask_test_epi16_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: vptestmw %ymm0, %ymm1, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: vzeroupper @@ -241,9 +217,7 @@ entry: define i32 @TEST_mm256_testn_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm256_testn_epi8_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 +; CHECK-NEXT: vptestnmb %ymm0, %ymm1, %k0 ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -259,10 +233,8 @@ entry: define i32 @TEST_mm256_mask_testn_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm256_mask_testn_epi8_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: vptestnmb %ymm0, %ymm1, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -280,9 +252,7 @@ entry: define zeroext i16 @TEST_mm256_testn_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm256_testn_epi16_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 +; CHECK-NEXT: vptestnmw %ymm0, %ymm1, %k0 ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: vzeroupper @@ -299,10 +269,8 @@ entry: define zeroext i16 @TEST_mm256_mask_testn_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm256_mask_testn_epi16_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: vptestnmw %ymm0, %ymm1, %k0 {%k1} ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/avx512f-vec-test-testn.ll b/llvm/test/CodeGen/X86/avx512f-vec-test-testn.ll index c9c0c22..e9cdacc 100644 --- a/llvm/test/CodeGen/X86/avx512f-vec-test-testn.ll +++ b/llvm/test/CodeGen/X86/avx512f-vec-test-testn.ll @@ -5,9 +5,7 @@ define zeroext i8 @TEST_mm512_test_epi64_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm512_test_epi64_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 +; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: vzeroupper @@ -23,9 +21,7 @@ entry: define zeroext i16 @TEST_mm512_test_epi32_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm512_test_epi32_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 +; CHECK-NEXT: vptestmd %zmm0, %zmm1, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: vzeroupper @@ -42,10 +38,8 @@ entry: define zeroext i8 @TEST_mm512_mask_test_epi64_mask(i8 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm512_mask_test_epi64_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1} ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: vzeroupper @@ -63,10 +57,8 @@ entry: define zeroext i16 @TEST_mm512_mask_test_epi32_mask(i16 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm512_mask_test_epi32_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: vptestmd %zmm0, %zmm1, %k0 {%k1} ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: vzeroupper @@ -85,9 +77,7 @@ entry: define zeroext i8 @TEST_mm512_testn_epi64_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm512_testn_epi64_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; CHECK-NEXT: vptestnmq %zmm0, %zmm1, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: vzeroupper @@ -103,9 +93,7 @@ entry: define zeroext i16 @TEST_mm512_testn_epi32_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm512_testn_epi32_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; CHECK-NEXT: vptestnmd %zmm0, %zmm1, %k0 ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: vzeroupper @@ -122,10 +110,8 @@ entry: define zeroext i8 @TEST_mm512_mask_testn_epi64_mask(i8 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm512_mask_testn_epi64_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: vptestnmq %zmm0, %zmm1, %k0 {%k1} ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: vzeroupper @@ -143,10 +129,8 @@ entry: define zeroext i16 @TEST_mm512_mask_testn_epi32_mask(i16 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; CHECK-LABEL: TEST_mm512_mask_testn_epi32_mask: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: vptestnmd %zmm0, %zmm1, %k0 {%k1} ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll index 5ee06fd..26a7d83 100644 --- a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -1217,8 +1217,6 @@ define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -1246,8 +1244,6 @@ define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -1278,8 +1274,6 @@ define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -1311,8 +1305,6 @@ define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -13586,8 +13578,6 @@ define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -13615,8 +13605,6 @@ define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64> ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -13647,8 +13635,6 @@ define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -13680,8 +13666,6 @@ define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem(i8 zeroext %__u, < ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -25987,8 +25971,6 @@ define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -26019,8 +26001,6 @@ define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64> ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -26053,8 +26033,6 @@ define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask(i8 zeroext %__u, <2 x ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -26089,8 +26067,6 @@ define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask_mem(i8 zeroext %__u, < ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -38587,8 +38563,6 @@ define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -38619,8 +38593,6 @@ define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64> ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -38654,8 +38626,6 @@ define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -38690,8 +38660,6 @@ define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask_mem(i8 zeroext %__u, < ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-test-testn.ll b/llvm/test/CodeGen/X86/avx512vl-vec-test-testn.ll index f1919cb..32de025 100644 --- a/llvm/test/CodeGen/X86/avx512vl-vec-test-testn.ll +++ b/llvm/test/CodeGen/X86/avx512vl-vec-test-testn.ll @@ -6,18 +6,14 @@ define zeroext i8 @TEST_mm_test_epi64_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { ; X86_64-LABEL: TEST_mm_test_epi64_mask: ; X86_64: # BB#0: # %entry -; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X86_64-NEXT: vpcmpneqq %xmm1, %xmm0, %k0 +; X86_64-NEXT: vptestmq %xmm0, %xmm1, %k0 ; X86_64-NEXT: kmovw %k0, %eax ; X86_64-NEXT: # kill: %AL %AL %EAX ; X86_64-NEXT: retq ; ; I386-LABEL: TEST_mm_test_epi64_mask: ; I386: # BB#0: # %entry -; I386-NEXT: vpand %xmm0, %xmm1, %xmm0 -; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; I386-NEXT: vpcmpneqq %xmm1, %xmm0, %k0 +; I386-NEXT: vptestmq %xmm0, %xmm1, %k0 ; I386-NEXT: kmovw %k0, %eax ; I386-NEXT: # kill: %AL %AL %EAX ; I386-NEXT: retl @@ -33,18 +29,14 @@ entry: define zeroext i8 @TEST_mm_test_epi32_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { ; X86_64-LABEL: TEST_mm_test_epi32_mask: ; X86_64: # BB#0: # %entry -; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X86_64-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 +; X86_64-NEXT: vptestmd %xmm0, %xmm1, %k0 ; X86_64-NEXT: kmovw %k0, %eax ; X86_64-NEXT: # kill: %AL %AL %EAX ; X86_64-NEXT: retq ; ; I386-LABEL: TEST_mm_test_epi32_mask: ; I386: # BB#0: # %entry -; I386-NEXT: vpand %xmm0, %xmm1, %xmm0 -; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; I386-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 +; I386-NEXT: vptestmd %xmm0, %xmm1, %k0 ; I386-NEXT: kmovw %k0, %eax ; I386-NEXT: # kill: %AL %AL %EAX ; I386-NEXT: retl @@ -61,9 +53,7 @@ entry: define zeroext i8 @TEST_mm256_test_epi64_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { ; X86_64-LABEL: TEST_mm256_test_epi64_mask: ; X86_64: # BB#0: # %entry -; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0 -; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X86_64-NEXT: vpcmpneqq %ymm1, %ymm0, %k0 +; X86_64-NEXT: vptestmq %ymm0, %ymm1, %k0 ; X86_64-NEXT: kmovw %k0, %eax ; X86_64-NEXT: # kill: %AL %AL %EAX ; X86_64-NEXT: vzeroupper @@ -71,9 +61,7 @@ define zeroext i8 @TEST_mm256_test_epi64_mask(<4 x i64> %__A, <4 x i64> %__B) lo ; ; I386-LABEL: TEST_mm256_test_epi64_mask: ; I386: # BB#0: # %entry -; I386-NEXT: vpand %ymm0, %ymm1, %ymm0 -; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; I386-NEXT: vpcmpneqq %ymm1, %ymm0, %k0 +; I386-NEXT: vptestmq %ymm0, %ymm1, %k0 ; I386-NEXT: kmovw %k0, %eax ; I386-NEXT: # kill: %AL %AL %EAX ; I386-NEXT: vzeroupper @@ -90,9 +78,7 @@ entry: define zeroext i8 @TEST_mm256_test_epi32_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { ; X86_64-LABEL: TEST_mm256_test_epi32_mask: ; X86_64: # BB#0: # %entry -; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0 -; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X86_64-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 +; X86_64-NEXT: vptestmd %ymm0, %ymm1, %k0 ; X86_64-NEXT: kmovw %k0, %eax ; X86_64-NEXT: # kill: %AL %AL %EAX ; X86_64-NEXT: vzeroupper @@ -100,9 +86,7 @@ define zeroext i8 @TEST_mm256_test_epi32_mask(<4 x i64> %__A, <4 x i64> %__B) lo ; ; I386-LABEL: TEST_mm256_test_epi32_mask: ; I386: # BB#0: # %entry -; I386-NEXT: vpand %ymm0, %ymm1, %ymm0 -; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; I386-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 +; I386-NEXT: vptestmd %ymm0, %ymm1, %k0 ; I386-NEXT: kmovw %k0, %eax ; I386-NEXT: # kill: %AL %AL %EAX ; I386-NEXT: vzeroupper @@ -119,21 +103,17 @@ entry: define zeroext i8 @TEST_mm_mask_test_epi64_mask(i8 %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { ; X86_64-LABEL: TEST_mm_mask_test_epi64_mask: ; X86_64: # BB#0: # %entry -; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86_64-NEXT: kmovw %edi, %k1 -; X86_64-NEXT: vpcmpneqq %xmm1, %xmm0, %k0 {%k1} +; X86_64-NEXT: vptestmq %xmm0, %xmm1, %k0 {%k1} ; X86_64-NEXT: kmovw %k0, %eax ; X86_64-NEXT: # kill: %AL %AL %EAX ; X86_64-NEXT: retq ; ; I386-LABEL: TEST_mm_mask_test_epi64_mask: ; I386: # BB#0: # %entry -; I386-NEXT: vpand %xmm0, %xmm1, %xmm0 -; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; I386-NEXT: kmovw %eax, %k1 -; I386-NEXT: vpcmpneqq %xmm1, %xmm0, %k0 {%k1} +; I386-NEXT: vptestmq %xmm0, %xmm1, %k0 {%k1} ; I386-NEXT: kmovw %k0, %eax ; I386-NEXT: # kill: %AL %AL %EAX ; I386-NEXT: retl @@ -152,21 +132,17 @@ entry: define zeroext i8 @TEST_mm_mask_test_epi32_mask(i8 %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { ; X86_64-LABEL: TEST_mm_mask_test_epi32_mask: ; X86_64: # BB#0: # %entry -; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86_64-NEXT: kmovw %edi, %k1 -; X86_64-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 {%k1} +; X86_64-NEXT: vptestmd %xmm0, %xmm1, %k0 {%k1} ; X86_64-NEXT: kmovw %k0, %eax ; X86_64-NEXT: # kill: %AL %AL %EAX ; X86_64-NEXT: retq ; ; I386-LABEL: TEST_mm_mask_test_epi32_mask: ; I386: # BB#0: # %entry -; I386-NEXT: vpand %xmm0, %xmm1, %xmm0 -; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; I386-NEXT: kmovw %eax, %k1 -; I386-NEXT: vpcmpneqd %xmm1, %xmm0, %k0 {%k1} +; I386-NEXT: vptestmd %xmm0, %xmm1, %k0 {%k1} ; I386-NEXT: kmovw %k0, %eax ; I386-NEXT: # kill: %AL %AL %EAX ; I386-NEXT: retl @@ -187,10 +163,8 @@ entry: define zeroext i8 @TEST_mm256_mask_test_epi64_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { ; X86_64-LABEL: TEST_mm256_mask_test_epi64_mask: ; X86_64: # BB#0: # %entry -; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0 -; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86_64-NEXT: kmovw %edi, %k1 -; X86_64-NEXT: vpcmpneqq %ymm1, %ymm0, %k0 {%k1} +; X86_64-NEXT: vptestmq %ymm0, %ymm1, %k0 {%k1} ; X86_64-NEXT: kmovw %k0, %eax ; X86_64-NEXT: # kill: %AL %AL %EAX ; X86_64-NEXT: vzeroupper @@ -198,11 +172,9 @@ define zeroext i8 @TEST_mm256_mask_test_epi64_mask(i8 %__U, <4 x i64> %__A, <4 x ; ; I386-LABEL: TEST_mm256_mask_test_epi64_mask: ; I386: # BB#0: # %entry -; I386-NEXT: vpand %ymm0, %ymm1, %ymm0 -; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; I386-NEXT: kmovw %eax, %k1 -; I386-NEXT: vpcmpneqq %ymm1, %ymm0, %k0 {%k1} +; I386-NEXT: vptestmq %ymm0, %ymm1, %k0 {%k1} ; I386-NEXT: kmovw %k0, %eax ; I386-NEXT: # kill: %AL %AL %EAX ; I386-NEXT: vzeroupper @@ -222,10 +194,8 @@ entry: define zeroext i8 @TEST_mm256_mask_test_epi32_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { ; X86_64-LABEL: TEST_mm256_mask_test_epi32_mask: ; X86_64: # BB#0: # %entry -; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0 -; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86_64-NEXT: kmovw %edi, %k1 -; X86_64-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 {%k1} +; X86_64-NEXT: vptestmd %ymm0, %ymm1, %k0 {%k1} ; X86_64-NEXT: kmovw %k0, %eax ; X86_64-NEXT: # kill: %AL %AL %EAX ; X86_64-NEXT: vzeroupper @@ -233,11 +203,9 @@ define zeroext i8 @TEST_mm256_mask_test_epi32_mask(i8 %__U, <4 x i64> %__A, <4 x ; ; I386-LABEL: TEST_mm256_mask_test_epi32_mask: ; I386: # BB#0: # %entry -; I386-NEXT: vpand %ymm0, %ymm1, %ymm0 -; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; I386-NEXT: kmovw %eax, %k1 -; I386-NEXT: vpcmpneqd %ymm1, %ymm0, %k0 {%k1} +; I386-NEXT: vptestmd %ymm0, %ymm1, %k0 {%k1} ; I386-NEXT: kmovw %k0, %eax ; I386-NEXT: # kill: %AL %AL %EAX ; I386-NEXT: vzeroupper @@ -256,18 +224,14 @@ entry: define zeroext i8 @TEST_mm_testn_epi64_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { ; X86_64-LABEL: TEST_mm_testn_epi64_mask: ; X86_64: # BB#0: # %entry -; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X86_64-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 +; X86_64-NEXT: vptestnmq %xmm0, %xmm1, %k0 ; X86_64-NEXT: kmovw %k0, %eax ; X86_64-NEXT: # kill: %AL %AL %EAX ; X86_64-NEXT: retq ; ; I386-LABEL: TEST_mm_testn_epi64_mask: ; I386: # BB#0: # %entry -; I386-NEXT: vpand %xmm0, %xmm1, %xmm0 -; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; I386-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 +; I386-NEXT: vptestnmq %xmm0, %xmm1, %k0 ; I386-NEXT: kmovw %k0, %eax ; I386-NEXT: # kill: %AL %AL %EAX ; I386-NEXT: retl @@ -283,18 +247,14 @@ entry: define zeroext i8 @TEST_mm_testn_epi32_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { ; X86_64-LABEL: TEST_mm_testn_epi32_mask: ; X86_64: # BB#0: # %entry -; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X86_64-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 +; X86_64-NEXT: vptestnmd %xmm0, %xmm1, %k0 ; X86_64-NEXT: kmovw %k0, %eax ; X86_64-NEXT: # kill: %AL %AL %EAX ; X86_64-NEXT: retq ; ; I386-LABEL: TEST_mm_testn_epi32_mask: ; I386: # BB#0: # %entry -; I386-NEXT: vpand %xmm0, %xmm1, %xmm0 -; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; I386-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 +; I386-NEXT: vptestnmd %xmm0, %xmm1, %k0 ; I386-NEXT: kmovw %k0, %eax ; I386-NEXT: # kill: %AL %AL %EAX ; I386-NEXT: retl @@ -311,9 +271,7 @@ entry: define zeroext i8 @TEST_mm256_testn_epi64_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { ; X86_64-LABEL: TEST_mm256_testn_epi64_mask: ; X86_64: # BB#0: # %entry -; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0 -; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X86_64-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 +; X86_64-NEXT: vptestnmq %ymm0, %ymm1, %k0 ; X86_64-NEXT: kmovw %k0, %eax ; X86_64-NEXT: # kill: %AL %AL %EAX ; X86_64-NEXT: vzeroupper @@ -321,9 +279,7 @@ define zeroext i8 @TEST_mm256_testn_epi64_mask(<4 x i64> %__A, <4 x i64> %__B) l ; ; I386-LABEL: TEST_mm256_testn_epi64_mask: ; I386: # BB#0: # %entry -; I386-NEXT: vpand %ymm0, %ymm1, %ymm0 -; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; I386-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 +; I386-NEXT: vptestnmq %ymm0, %ymm1, %k0 ; I386-NEXT: kmovw %k0, %eax ; I386-NEXT: # kill: %AL %AL %EAX ; I386-NEXT: vzeroupper @@ -340,9 +296,7 @@ entry: define zeroext i8 @TEST_mm256_testn_epi32_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { ; X86_64-LABEL: TEST_mm256_testn_epi32_mask: ; X86_64: # BB#0: # %entry -; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0 -; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X86_64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 +; X86_64-NEXT: vptestnmd %ymm0, %ymm1, %k0 ; X86_64-NEXT: kmovw %k0, %eax ; X86_64-NEXT: # kill: %AL %AL %EAX ; X86_64-NEXT: vzeroupper @@ -350,9 +304,7 @@ define zeroext i8 @TEST_mm256_testn_epi32_mask(<4 x i64> %__A, <4 x i64> %__B) l ; ; I386-LABEL: TEST_mm256_testn_epi32_mask: ; I386: # BB#0: # %entry -; I386-NEXT: vpand %ymm0, %ymm1, %ymm0 -; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; I386-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 +; I386-NEXT: vptestnmd %ymm0, %ymm1, %k0 ; I386-NEXT: kmovw %k0, %eax ; I386-NEXT: # kill: %AL %AL %EAX ; I386-NEXT: vzeroupper @@ -369,21 +321,17 @@ entry: define zeroext i8 @TEST_mm_mask_testn_epi64_mask(i8 %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { ; X86_64-LABEL: TEST_mm_mask_testn_epi64_mask: ; X86_64: # BB#0: # %entry -; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86_64-NEXT: kmovw %edi, %k1 -; X86_64-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} +; X86_64-NEXT: vptestnmq %xmm0, %xmm1, %k0 {%k1} ; X86_64-NEXT: kmovw %k0, %eax ; X86_64-NEXT: # kill: %AL %AL %EAX ; X86_64-NEXT: retq ; ; I386-LABEL: TEST_mm_mask_testn_epi64_mask: ; I386: # BB#0: # %entry -; I386-NEXT: vpand %xmm0, %xmm1, %xmm0 -; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; I386-NEXT: kmovw %eax, %k1 -; I386-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} +; I386-NEXT: vptestnmq %xmm0, %xmm1, %k0 {%k1} ; I386-NEXT: kmovw %k0, %eax ; I386-NEXT: # kill: %AL %AL %EAX ; I386-NEXT: retl @@ -402,21 +350,17 @@ entry: define zeroext i8 @TEST_mm_mask_testn_epi32_mask(i8 %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { ; X86_64-LABEL: TEST_mm_mask_testn_epi32_mask: ; X86_64: # BB#0: # %entry -; X86_64-NEXT: vpand %xmm0, %xmm1, %xmm0 -; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86_64-NEXT: kmovw %edi, %k1 -; X86_64-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} +; X86_64-NEXT: vptestnmd %xmm0, %xmm1, %k0 {%k1} ; X86_64-NEXT: kmovw %k0, %eax ; X86_64-NEXT: # kill: %AL %AL %EAX ; X86_64-NEXT: retq ; ; I386-LABEL: TEST_mm_mask_testn_epi32_mask: ; I386: # BB#0: # %entry -; I386-NEXT: vpand %xmm0, %xmm1, %xmm0 -; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; I386-NEXT: kmovw %eax, %k1 -; I386-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} +; I386-NEXT: vptestnmd %xmm0, %xmm1, %k0 {%k1} ; I386-NEXT: kmovw %k0, %eax ; I386-NEXT: # kill: %AL %AL %EAX ; I386-NEXT: retl @@ -437,10 +381,8 @@ entry: define zeroext i8 @TEST_mm256_mask_testn_epi64_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { ; X86_64-LABEL: TEST_mm256_mask_testn_epi64_mask: ; X86_64: # BB#0: # %entry -; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0 -; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86_64-NEXT: kmovw %edi, %k1 -; X86_64-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} +; X86_64-NEXT: vptestnmq %ymm0, %ymm1, %k0 {%k1} ; X86_64-NEXT: kmovw %k0, %eax ; X86_64-NEXT: # kill: %AL %AL %EAX ; X86_64-NEXT: vzeroupper @@ -448,11 +390,9 @@ define zeroext i8 @TEST_mm256_mask_testn_epi64_mask(i8 %__U, <4 x i64> %__A, <4 ; ; I386-LABEL: TEST_mm256_mask_testn_epi64_mask: ; I386: # BB#0: # %entry -; I386-NEXT: vpand %ymm0, %ymm1, %ymm0 -; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; I386-NEXT: kmovw %eax, %k1 -; I386-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} +; I386-NEXT: vptestnmq %ymm0, %ymm1, %k0 {%k1} ; I386-NEXT: kmovw %k0, %eax ; I386-NEXT: # kill: %AL %AL %EAX ; I386-NEXT: vzeroupper @@ -472,10 +412,8 @@ entry: define zeroext i8 @TEST_mm256_mask_testn_epi32_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { ; X86_64-LABEL: TEST_mm256_mask_testn_epi32_mask: ; X86_64: # BB#0: # %entry -; X86_64-NEXT: vpand %ymm0, %ymm1, %ymm0 -; X86_64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86_64-NEXT: kmovw %edi, %k1 -; X86_64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} +; X86_64-NEXT: vptestnmd %ymm0, %ymm1, %k0 {%k1} ; X86_64-NEXT: kmovw %k0, %eax ; X86_64-NEXT: # kill: %AL %AL %EAX ; X86_64-NEXT: vzeroupper @@ -483,11 +421,9 @@ define zeroext i8 @TEST_mm256_mask_testn_epi32_mask(i8 %__U, <4 x i64> %__A, <4 ; ; I386-LABEL: TEST_mm256_mask_testn_epi32_mask: ; I386: # BB#0: # %entry -; I386-NEXT: vpand %ymm0, %ymm1, %ymm0 -; I386-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; I386-NEXT: kmovw %eax, %k1 -; I386-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} +; I386-NEXT: vptestnmd %ymm0, %ymm1, %k0 {%k1} ; I386-NEXT: kmovw %k0, %eax ; I386-NEXT: # kill: %AL %AL %EAX ; I386-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/compress_expand.ll b/llvm/test/CodeGen/X86/compress_expand.ll index c6a1c07..9237544 100644 --- a/llvm/test/CodeGen/X86/compress_expand.ll +++ b/llvm/test/CodeGen/X86/compress_expand.ll @@ -140,9 +140,7 @@ define void @test7(float* %base, <8 x float> %V, <8 x i1> %mask) { ; KNL-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL-NEXT: vptestmq %zmm1, %zmm1, %k0 -; KNL-NEXT: kshiftlw $8, %k0, %k0 -; KNL-NEXT: kshiftrw $8, %k0, %k1 +; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1} ; KNL-NEXT: retq call void @llvm.masked.compressstore.v8f32(<8 x float> %V, float* %base, <8 x i1> %mask) diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 8983c3a..94057f9 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -1057,9 +1057,7 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) { ; SKX: # BB#0: ; SKX-NEXT: # kill: %XMM1 %XMM1 %YMM1 ; SKX-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0 -; SKX-NEXT: kshiftlb $6, %k0, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k1 +; SKX-NEXT: vptestmq %xmm2, %xmm2, %k1 ; SKX-NEXT: vscatterqps %xmm0, (,%ymm1) {%k1} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -1068,9 +1066,7 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) { ; SKX_32: # BB#0: ; SKX_32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3] ; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k0 -; SKX_32-NEXT: kshiftlb $6, %k0, %k0 -; SKX_32-NEXT: kshiftrb $6, %k0, %k1 +; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k1 ; SKX_32-NEXT: vscatterdps %xmm0, (,%xmm1) {%k1} ; SKX_32-NEXT: retl call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask) @@ -1105,9 +1101,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) { ; SKX: # BB#0: ; SKX-NEXT: # kill: %XMM1 %XMM1 %YMM1 ; SKX-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0 -; SKX-NEXT: kshiftlb $6, %k0, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k1 +; SKX-NEXT: vptestmq %xmm2, %xmm2, %k1 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1} ; SKX-NEXT: vzeroupper @@ -1117,9 +1111,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) { ; SKX_32: # BB#0: ; SKX_32-NEXT: # kill: %XMM1 %XMM1 %YMM1 ; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k0 -; SKX_32-NEXT: kshiftlb $6, %k0, %k0 -; SKX_32-NEXT: kshiftrb $6, %k0, %k1 +; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k1 ; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1} ; SKX_32-NEXT: vzeroupper @@ -1165,9 +1157,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl ; SKX: # BB#0: ; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX-NEXT: vptestmq %xmm1, %xmm1, %k0 -; SKX-NEXT: kshiftlb $6, %k0, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k1 +; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 ; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm2 {%k1} ; SKX-NEXT: vmovaps %xmm2, %xmm0 ; SKX-NEXT: retq @@ -1176,9 +1166,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl ; SKX_32: # BB#0: ; SKX_32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k0 -; SKX_32-NEXT: kshiftlb $6, %k0, %k0 -; SKX_32-NEXT: kshiftrb $6, %k0, %k1 +; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm2 {%k1} ; SKX_32-NEXT: vmovaps %xmm2, %xmm0 diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll index 3e257f5..f43e3f6 100644 --- a/llvm/test/CodeGen/X86/masked_memop.ll +++ b/llvm/test/CodeGen/X86/masked_memop.ll @@ -285,9 +285,7 @@ define <8 x i32> @test11b(<8 x i1> %mask, <8 x i32>* %addr, <8 x i32> %dst) { ; AVX512F-NEXT: ## kill: %YMM1 %YMM1 %ZMM1 ; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kshiftlw $8, %k0, %k0 -; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vpblendmd (%rdi), %zmm1, %zmm0 {%k1} ; AVX512F-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; AVX512F-NEXT: retq @@ -327,9 +325,7 @@ define <8 x float> @test11c(<8 x i1> %mask, <8 x float>* %addr) { ; AVX512F: ## BB#0: ; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kshiftlw $8, %k0, %k0 -; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} ; AVX512F-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; AVX512F-NEXT: retq @@ -369,9 +365,7 @@ define <8 x i32> @test11d(<8 x i1> %mask, <8 x i32>* %addr) { ; AVX512F: ## BB#0: ; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kshiftlw $8, %k0, %k0 -; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} ; AVX512F-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; AVX512F-NEXT: retq diff --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll index 20c77a4..359b8d6 100644 --- a/llvm/test/CodeGen/X86/setcc-lowering.ll +++ b/llvm/test/CodeGen/X86/setcc-lowering.ll @@ -23,10 +23,9 @@ define <8 x i16> @pr25080(<8 x i32> %a) { ; ; KNL-32-LABEL: pr25080: ; KNL-32: # BB#0: # %entry -; KNL-32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [8388607,8388607,8388607,8388607,8388607,8388607,8388607,8388607] -; KNL-32-NEXT: vpand %ymm1, %ymm0, %ymm0 -; KNL-32-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-32-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; KNL-32-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; KNL-32-NEXT: vbroadcastss {{.*#+}} ymm1 = [8388607,8388607,8388607,8388607,8388607,8388607,8388607,8388607] +; KNL-32-NEXT: vptestnmd %zmm1, %zmm0, %k0 ; KNL-32-NEXT: movb $15, %al ; KNL-32-NEXT: kmovw %eax, %k1 ; KNL-32-NEXT: korw %k1, %k0, %k1