From 8d76fbb5f0659bba47a7f38847550a2bc8bbf0c1 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 17 Oct 2022 10:12:24 -0400 Subject: [PATCH] [VectorCombine] fix crashing on match of non-canonical fneg We can't assume that operand 0 is the negated operand because the matcher handles "fsub -0.0, X" (and also +0.0 with FMF). By capturing the extract within the match, we avoid the bug and make the transform more robust (can't assume that this pass will only see canonical IR). --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 8 +++++-- .../VectorCombine/X86/extract-fneg-insert.ll | 28 ++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 7046859..a21add2 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -550,10 +550,15 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) { m_ConstantInt(Index)))) return false; + // Note: This handles the canonical fneg instruction and "fsub -0.0, X". Value *SrcVec; - if (!match(FNeg, m_FNeg(m_ExtractElt(m_Value(SrcVec), m_SpecificInt(Index))))) + Instruction *Extract; + if (!match(FNeg, m_FNeg(m_CombineAnd( + m_Instruction(Extract), + m_ExtractElt(m_Value(SrcVec), m_SpecificInt(Index)))))) return false; + // TODO: We could handle this with a length-changing shuffle. if (SrcVec->getType() != VecTy) return false; @@ -577,7 +582,6 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) { // If the extract has one use, it will be eliminated, so count it in the // original cost. If it has more than one use, ignore the cost because it will // be the same before/after. - Instruction *Extract = cast(FNeg->getOperand(0)); if (Extract->hasOneUse()) OldCost += TTI.getVectorInstrCost(*Extract, VecTy, Index); diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll index 0abccb1..1e95cbd 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll @@ -154,3 +154,31 @@ define <4 x float> @ext12_v4f32(<4 x float> %x, <4 x float> %y) { %r = insertelement <4 x float> %y, float %n, i32 12 ret <4 x float> %r } + +; This used to crash because we assumed matching a true, unary fneg instruction. + +define <2 x float> @ext1_v2f32_fsub(<2 x float> %x) { +; CHECK-LABEL: @ext1_v2f32_fsub( +; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[TMP1]], <2 x i32> +; CHECK-NEXT: ret <2 x float> [[R]] +; + %e = extractelement <2 x float> %x, i32 1 + %s = fsub float -0.0, %e + %r = insertelement <2 x float> %x, float %s, i32 1 + ret <2 x float> %r +} + +; This used to crash because we assumed matching a true, unary fneg instruction. + +define <2 x float> @ext1_v2f32_fsub_fmf(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @ext1_v2f32_fsub_fmf( +; CHECK-NEXT: [[TMP1:%.*]] = fneg nnan nsz <2 x float> [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> [[TMP1]], <2 x i32> +; CHECK-NEXT: ret <2 x float> [[R]] +; + %e = extractelement <2 x float> %x, i32 1 + %s = fsub nsz nnan float 0.0, %e + %r = insertelement <2 x float> %y, float %s, i32 1 + ret <2 x float> %r +} -- 2.7.4