From d2533d96e14118a86904a69fd2a192c008642f14 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Mon, 29 Jun 2020 11:37:03 -0500 Subject: [PATCH] [PowerPC] Fix crash for shuffle canonicalization with elt 0 from RHS Commit 1fed131660b2 assumed that shuffle vector canonicalization will always ensure that the shuffle mask will be ordered so that element zero comes from the LHS vector. However there is code out there for which this is not the case. This patch simply removes that unsafe assumption and makes the code work regardless of the source of the first element. --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 12 +++-- .../CodeGen/PowerPC/canonical-merge-shuffles.ll | 54 ++++++++++++++++++++++ 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f300fab..0d60c5e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14365,10 +14365,16 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN, // Adjust the mask so we are pulling in the same index from the splat // as the index from the interesting vector in consecutive elements. - // Example: + // Example (even elements from first vector): // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, - for (int i = 1, e = Mask.size(); i < e; i += 2) - ShuffV[i] = (ShuffV[i - 1] + NumElts); + if (Mask[0] < NumElts) + for (int i = 1, e = Mask.size(); i < e; i += 2) + ShuffV[i] = (ShuffV[i - 1] + NumElts); + // Example (odd elements from first vector): + // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, + else + for (int i = 0, e = Mask.size(); i < e; i += 2) + ShuffV[i] = (ShuffV[i + 1] + NumElts); Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV); return Res; diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll index 445f0c6..6c7923c 100644 --- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll +++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll @@ -290,4 +290,58 @@ entry: ret <8 x i16> %1 } +define dso_local void @no_crash_elt0_from_RHS(<2 x double>* noalias nocapture dereferenceable(16) %.vtx6) #0 { +; CHECK-P8-LABEL: no_crash_elt0_from_RHS: +; CHECK-P8: # %bb.0: # %test_entry +; CHECK-P8-NEXT: mflr r0 +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r0, 16(r1) +; CHECK-P8-NEXT: stdu r1, -48(r1) +; CHECK-P8-NEXT: mr r30, r3 +; CHECK-P8-NEXT: bl dummy +; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: xxlxor f0, f0, f0 +; CHECK-P8-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-P8-NEXT: xxmrghd vs0, vs1, vs0 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 +; +; CHECK-P9-LABEL: no_crash_elt0_from_RHS: +; CHECK-P9: # %bb.0: # %test_entry +; CHECK-P9-NEXT: mflr r0 +; CHECK-P9-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: std r0, 16(r1) +; CHECK-P9-NEXT: stdu r1, -48(r1) +; CHECK-P9-NEXT: mr r30, r3 +; CHECK-P9-NEXT: bl dummy +; CHECK-P9-NEXT: nop +; CHECK-P9-NEXT: xxlxor f0, f0, f0 +; CHECK-P9-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0 +; CHECK-P9-NEXT: stxv vs0, 0(r30) +; +; CHECK-NOVSX-LABEL: no_crash_elt0_from_RHS: +; CHECK-NOVSX: # %bb.0: # %test_entry +; CHECK-NOVSX-NEXT: mflr r0 +; CHECK-NOVSX-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NOVSX-NEXT: std r0, 16(r1) +; CHECK-NOVSX-NEXT: stdu r1, -48(r1) +; CHECK-NOVSX-NEXT: mr r30, r3 +; CHECK-NOVSX-NEXT: bl dummy +; CHECK-NOVSX-NEXT: nop +; CHECK-NOVSX-NEXT: li r3, 0 +; CHECK-NOVSX-NEXT: stfd f1, 8(r30) +; CHECK-NOVSX-NEXT: std r3, 0(r30) +test_entry: + %_div_result = tail call double @dummy() + %oldret = insertvalue { double, double } undef, double %_div_result, 0 + %0 = extractvalue { double, double } %oldret, 0 + %.splatinsert = insertelement <2 x double> undef, double %0, i32 0 + %.splat = shufflevector <2 x double> %.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer + %1 = shufflevector <2 x double> zeroinitializer, <2 x double> %.splat, <2 x i32> + store <2 x double> %1, <2 x double>* %.vtx6, align 16 + unreachable +} + +declare double @dummy() local_unnamed_addr attributes #0 = { nounwind } -- 2.7.4