From c2e4405475f49f72f357459d56a20b5a471f4261 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 21 Mar 2019 18:32:38 +0000 Subject: [PATCH] [X86] canonicalizeBitSelect - don't attempt to canonicalize mask registers We don't use X86ISD::ANDNP for mask registers. Test case from @craig.topper (Craig Topper) llvm-svn: 356696 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/test/CodeGen/X86/combine-bitselect.ll | 82 ++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 875fe4f..db56df6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37266,7 +37266,7 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG, assert(N->getOpcode() == ISD::OR && "Unexpected Opcode"); EVT VT = N->getValueType(0); - if (!VT.isVector()) + if (!VT.isVector() || (VT.getScalarSizeInBits() % 8) != 0) return SDValue(); SDValue N0 = peekThroughBitcasts(N->getOperand(0)); diff --git a/llvm/test/CodeGen/X86/combine-bitselect.ll b/llvm/test/CodeGen/X86/combine-bitselect.ll index 3518c93..973f5c2 100644 --- a/llvm/test/CodeGen/X86/combine-bitselect.ll +++ b/llvm/test/CodeGen/X86/combine-bitselect.ll @@ -591,3 +591,85 @@ define <8 x i64> @bitselect_v8i64_mm(<8 x i64>* nocapture readonly, <8 x i64>* n %7 = or <8 x i64> %6, %5 ret <8 x i64> %7 } + +; Check that mask registers don't get canonicalized. +define void @bitselect_v4i1_loop(<4 x i32> %a0) { +; SSE-LABEL: bitselect_v4i1_loop: +; SSE: # %bb.0: # %bb +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE-NEXT: pxor %xmm1, %xmm2 +; SSE-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; SSE-NEXT: movdqa %xmm1, %xmm3 +; SSE-NEXT: pandn %xmm0, %xmm3 +; SSE-NEXT: .p2align 4, 0x90 +; SSE-NEXT: .LBB12_1: # %bb1 +; SSE-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE-NEXT: pand %xmm1, %xmm2 +; SSE-NEXT: por %xmm3, %xmm2 +; SSE-NEXT: jmp .LBB12_1 +; +; XOP-LABEL: bitselect_v4i1_loop: +; XOP: # %bb.0: # %bb +; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; XOP-NEXT: vpcomneqd %xmm1, %xmm0, %xmm1 +; XOP-NEXT: vpcomeqd {{.*}}(%rip), %xmm0, %xmm0 +; XOP-NEXT: vmovdqa %xmm1, %xmm2 +; XOP-NEXT: .p2align 4, 0x90 +; XOP-NEXT: .LBB12_1: # %bb1 +; XOP-NEXT: # =>This Inner Loop Header: Depth=1 +; XOP-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm2 +; XOP-NEXT: jmp .LBB12_1 +; +; AVX1-LABEL: bitselect_v4i1_loop: +; AVX1: # %bb.0: # %bb +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2 +; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: .p2align 4, 0x90 +; AVX1-NEXT: .LBB12_1: # %bb1 +; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 +; AVX1-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm2 +; AVX1-NEXT: jmp .LBB12_1 +; +; AVX2-LABEL: bitselect_v4i1_loop: +; AVX2: # %bb.0: # %bb +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [12,12,12,12] +; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: .p2align 4, 0x90 +; AVX2-NEXT: .LBB12_1: # %bb1 +; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 +; AVX2-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm2 +; AVX2-NEXT: jmp .LBB12_1 +; +; AVX512F-LABEL: bitselect_v4i1_loop: +; AVX512F: # %bb.0: # %bb +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12] +; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k2 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2} +; AVX512F-NEXT: .p2align 4, 0x90 +; AVX512F-NEXT: .LBB12_1: # %bb1 +; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1 +; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1 {%k1} +; AVX512F-NEXT: korw %k1, %k0, %k1 +; AVX512F-NEXT: jmp .LBB12_1 +bb: + %tmp = icmp ne <4 x i32> %a0, zeroinitializer + br label %bb1 + +bb1: ; preds = %bb1, %bb + %tmp2 = phi <4 x i1> [ %tmp, %bb ], [ %tmp4, %bb1 ] + %tmp3 = icmp eq <4 x i32> %a0, + %tmp4 = select <4 x i1> %tmp, <4 x i1> %tmp3, <4 x i1> %tmp2 + %tmp5 = and <4 x i1> %tmp4, %tmp + br label %bb1 +} -- 2.7.4