From cc86e6b0a88be33a8b74b5f0431424909dec9feb Mon Sep 17 00:00:00 2001 From: Xiang1 Zhang Date: Tue, 21 Mar 2023 17:33:54 +0800 Subject: [PATCH] [BugFix] Fix VSELECT ISel fail Reviewed By: Luo yuanke Differential Revision: https://reviews.llvm.org/D146683 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 15 ++++++++++----- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 2 ++ llvm/test/CodeGen/X86/vselect-post-combine.ll | 24 ++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/X86/vselect-post-combine.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cc722bc..20d7447 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12210,7 +12210,8 @@ static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) { /// This function is called by the DAGCombiner when visiting sext/zext/aext /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, - SelectionDAG &DAG) { + SelectionDAG &DAG, + CombineLevel Level) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -12235,10 +12236,14 @@ static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, else if (Opcode == ISD::ZERO_EXTEND) ExtLoadOpcode = ISD::ZEXTLOAD; + // Illegal VSELECT may ISel fail if happen after legalization (DAG + // Combine2), so we should conservatively check the OperationAction. LoadSDNode *Load1 = cast(Op1); LoadSDNode *Load2 = cast(Op2); if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) || - !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT())) + !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) || + (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes && + TLI.getOperationAction(ISD::VSELECT, VT) != TargetLowering::Legal)) return SDValue(); SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1); @@ -13106,7 +13111,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); } - if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level)) return Res; return SDValue(); @@ -13457,7 +13462,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (SDValue V = widenAbs(N, DAG)) return V; - if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level)) return Res; return SDValue(); @@ -13618,7 +13623,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { if (SDValue NewCtPop = widenCtPop(N, DAG)) return NewCtPop; - if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level)) return Res; return SDValue(); diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 5e90a94..dfac249 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1039,6 +1039,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() { break; assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!"); + assert(N->getValueType(0).getVectorElementType() != MVT::i16 && + "We can't replace VSELECT with BLENDV in vXi16!"); SDValue Blendv = CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0), N->getOperand(0), N->getOperand(1), N->getOperand(2)); diff --git a/llvm/test/CodeGen/X86/vselect-post-combine.ll b/llvm/test/CodeGen/X86/vselect-post-combine.ll new file mode 100644 index 0000000..fdbc361 --- /dev/null +++ b/llvm/test/CodeGen/X86/vselect-post-combine.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 + +define ptr @test_mul(ptr %addr) { +; AVX2-LABEL: test_mul: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = [255,0,0,0] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX2-NEXT: vpblendvb %xmm0, (%rdi), %xmm1, %xmm0 +; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX2-NEXT: vmovdqu %ymm0, 0 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +entry: + %vec0 = load <32 x i8>, ptr %addr + %vec1 = shufflevector <32 x i8> %vec0, <32 x i8> , <32 x i32> + %0 = bitcast <32 x i8> %vec1 to <4 x i64> + %shuffle = shufflevector <4 x i64> %0, <4 x i64> zeroinitializer, <2 x i32> + %1 = bitcast <2 x i64> %shuffle to <16 x i8> + %conv = zext <16 x i8> %1 to <16 x i16> + store <16 x i16> %conv, ptr null, align 1 + ret ptr null +} -- 2.7.4