From: Simon Pilgrim Date: Sat, 19 Mar 2022 16:31:15 +0000 (+0000) Subject: [X86] combineSelect - don't constant fold BLENDV nodes like VSELECT X-Git-Tag: upstream/15.0.7~13027 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a6c18bfbe39dc494d18fb8e66b16d0e292ed25e4;p=platform%2Fupstream%2Fllvm.git [X86] combineSelect - don't constant fold BLENDV nodes like VSELECT If a X86ISD::BLENDV op appears before legalization (in this test case due to the icmp_slt x, 0) its constant mask was being treated as a vselect mask (mask != 0) instead of blendv (mask < 0) This just prevents constant folding entirely for non-VSELECT ops. --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 20e9a21..96000ec 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -43904,7 +43904,8 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return V; // Convert vselects with constant condition into shuffles. - if (CondConstantVector && DCI.isBeforeLegalizeOps()) { + if (CondConstantVector && DCI.isBeforeLegalizeOps() && + N->getOpcode() == ISD::VSELECT) { SmallVector Mask; if (createShuffleMaskFromVSELECT(Mask, Cond)) return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask); diff --git a/llvm/test/CodeGen/X86/avx-select.ll b/llvm/test/CodeGen/X86/avx-select.ll index ada4942..1ed71c3 100644 --- a/llvm/test/CodeGen/X86/avx-select.ll +++ b/llvm/test/CodeGen/X86/avx-select.ll @@ -58,20 +58,26 @@ define <4 x i64> @select01(i32 %a, <4 x i64> %b) nounwind { ret <4 x i64> %res } -; FIXME: If a X86ISD::BLENDV node appears before legalization, its been constant folded like a vselect (mask != 0) instead of (mask < 0) +; FIXME: If a X86ISD::BLENDV node appears before legalization, constant fold using (mask < 0) instead of like a vselect (mask != 0). define void @fold_blendv_mask(<4 x i32> %a0) { ; X86-LABEL: fold_blendv_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: vmovaps {{.*#+}} ymm0 = [26146,4294966039,4294967294,4294964244,29361,4294951202,4294964216,4294941010] -; X86-NEXT: vmovaps %ymm0, (%eax) -; X86-NEXT: vzeroupper +; X86-NEXT: vmovaps {{.*#+}} xmm0 = [44158,54560,45291,18686] +; X86-NEXT: vmovaps {{.*#+}} xmm1 = [4294942349,7802,29242,15858] +; X86-NEXT: vblendvps %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm0 +; X86-NEXT: vmovaps {{.*#+}} xmm1 = [29361,4294951202,4294964216,4294941010] +; X86-NEXT: vmovaps %xmm1, (%eax) +; X86-NEXT: vmovaps %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: fold_blendv_mask: ; X64: # %bb.0: # %entry -; X64-NEXT: vmovaps {{.*#+}} ymm0 = [26146,4294966039,4294967294,4294964244,29361,4294951202,4294964216,4294941010] -; X64-NEXT: vmovaps %ymm0, (%rax) -; X64-NEXT: vzeroupper +; X64-NEXT: vmovaps {{.*#+}} xmm0 = [44158,54560,45291,18686] +; X64-NEXT: vmovaps {{.*#+}} xmm1 = [4294942349,7802,29242,15858] +; X64-NEXT: vblendvps %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 +; X64-NEXT: vmovaps {{.*#+}} xmm1 = [29361,4294951202,4294964216,4294941010] +; X64-NEXT: vmovaps %xmm1, (%rax) +; X64-NEXT: vmovaps %xmm0, (%rax) ; X64-NEXT: retq entry: br label %head