From 2f4f8fcf64c67ee327e19b74a82a2330cfb32312 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 1 Mar 2020 00:01:38 -0800 Subject: [PATCH] [X86] Don't add DELETED_NODES to DAG combine worklist after calling SimplifyDemandedBits/SimplifyDemandedVectorElts. These AddToWorklist calls were added in 84cd968f75bbd6e0fbabecc29d2c1090263adec7. It's possible the SimplifyDemandedBits/SimplifyDemandedVectorElts triggered CSE that deleted N. Detect that and avoid adding N to the worklist. Fixes PR45067. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 18 ++++++++++++------ llvm/test/CodeGen/X86/pr45067.ll | 23 +++++++++++++++++++++++ 2 files changed, 35 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/X86/pr45067.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 956f57d..4c3fd0d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -42212,7 +42212,8 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, if (Mask.getScalarValueSizeInBits() != 1) { APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits())); if (TLI.SimplifyDemandedBits(Mask, DemandedBits, DCI)) { - DCI.AddToWorklist(N); + if (N->getOpcode() != ISD::DELETED_NODE) + DCI.AddToWorklist(N); return SDValue(N, 0); } if (SDValue NewMask = @@ -42479,7 +42480,8 @@ static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.SimplifyDemandedVectorElts(StoredVal, DemandedElts, KnownUndef, KnownZero, DCI)) { - DCI.AddToWorklist(N); + if (N->getOpcode() != ISD::DELETED_NODE) + DCI.AddToWorklist(N); return SDValue(N, 0); } @@ -43828,7 +43830,8 @@ static SDValue combineBT(SDNode *N, SelectionDAG &DAG, unsigned BitWidth = N1.getValueSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth)); if (DAG.getTargetLoweringInfo().SimplifyDemandedBits(N1, DemandedMask, DCI)) { - DCI.AddToWorklist(N); + if (N->getOpcode() != ISD::DELETED_NODE) + DCI.AddToWorklist(N); return SDValue(N, 0); } @@ -43846,7 +43849,8 @@ static SDValue combineCVTPH2PS(SDNode *N, SelectionDAG &DAG, APInt DemandedElts = APInt::getLowBitsSet(8, 4); if (TLI.SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero, DCI)) { - DCI.AddToWorklist(N); + if (N->getOpcode() != ISD::DELETED_NODE) + DCI.AddToWorklist(N); return SDValue(N, 0); } @@ -44755,7 +44759,8 @@ static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits())); if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) { - DCI.AddToWorklist(N); + if (N->getOpcode() != ISD::DELETED_NODE) + DCI.AddToWorklist(N); return SDValue(N, 0); } } @@ -44847,7 +44852,8 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits())); if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) { - DCI.AddToWorklist(N); + if (N->getOpcode() != ISD::DELETED_NODE) + DCI.AddToWorklist(N); return SDValue(N, 0); } } diff --git a/llvm/test/CodeGen/X86/pr45067.ll b/llvm/test/CodeGen/X86/pr45067.ll new file mode 100644 index 0000000..fc1baa3 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr45067.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skylake | FileCheck %s + +@global = external global i32, align 4 + +define void @foo(<8 x i32>* %x, <8 x i1> %y) { +; CHECK-LABEL: foo: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpbroadcastq _global@{{.*}}(%rip), %ymm2 +; CHECK-NEXT: vpgatherqd %xmm1, (,%ymm2), %xmm3 +; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; CHECK-NEXT: vpslld $31, %ymm0, %ymm0 +; CHECK-NEXT: vinserti128 $1, %xmm3, %ymm3, %ymm1 +; CHECK-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) +; CHECK-NEXT: ud2 + %tmp = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> , i32 4, <8 x i1> , <8 x i32> undef) + call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %tmp, <8 x i32>* %x, i32 4, <8 x i1> %y) + unreachable +} + +declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>, i32, <8 x i1>, <8 x i32>) +declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>) -- 2.7.4