From: Nemanja Ivanovic Date: Thu, 2 Jul 2020 15:14:54 +0000 (-0500) Subject: [PowerPC] Remove undefs from splat input when changing shuffle mask X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a701dc5510ab8e977dcb797035475e641d6a353a;p=platform%2Fupstream%2Fllvm.git [PowerPC] Remove undefs from splat input when changing shuffle mask As of 1fed131660b2c5d3ea7007e273a7a5da80699445, we have code that changes shuffle masks so that we can put the shuffle in a canonical form that can be matched to a single instruction. However, it does not properly account for undef elements in the BUILD_VECTOR that is the RHS splat so we can end up with undefs where they shouldn't be. This patch converts the splat input with undefs to one without. --- diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 0743a5a..6083a0d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14452,6 +14452,11 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN, for (int i = 0, e = Mask.size(); i < e; i += 2) ShuffV[i] = (ShuffV[i + 1] + NumElts); + // If the RHS has undefs, we need to remove them since we may have created + // a shuffle that adds those instead of the splat value. + SDValue SplatVal = cast(RHS.getNode())->getSplatValue(); + RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal); + Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV); return Res; } diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll index 6c7923c..53e48b18 100644 --- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll +++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll @@ -343,5 +343,37 @@ test_entry: unreachable } +define dso_local <4 x i32> @replace_undefs_in_splat(<4 x i32> %a) local_unnamed_addr #0 { +; CHECK-P8-LABEL: replace_undefs_in_splat: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: addis r3, r2, .LCPI14_0@toc@ha +; CHECK-P8-NEXT: addi r3, r3, .LCPI14_0@toc@l +; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: vmrgow v2, v3, v2 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: replace_undefs_in_splat: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: addis r3, r2, .LCPI14_0@toc@ha +; CHECK-P9-NEXT: addi r3, r3, .LCPI14_0@toc@l +; CHECK-P9-NEXT: lxvx v3, 0, r3 +; CHECK-P9-NEXT: vmrgow v2, v3, v2 +; CHECK-P9-NEXT: blr +; +; CHECK-NOVSX-LABEL: replace_undefs_in_splat: +; CHECK-NOVSX: # %bb.0: # %entry +; CHECK-NOVSX-NEXT: addis r3, r2, .LCPI14_0@toc@ha +; CHECK-NOVSX-NEXT: addis r4, r2, .LCPI14_1@toc@ha +; CHECK-NOVSX-NEXT: addi r3, r3, .LCPI14_0@toc@l +; CHECK-NOVSX-NEXT: lvx v3, 0, r3 +; CHECK-NOVSX-NEXT: addi r3, r4, .LCPI14_1@toc@l +; CHECK-NOVSX-NEXT: lvx v4, 0, r3 +; CHECK-NOVSX-NEXT: vperm v2, v4, v2, v3 +; CHECK-NOVSX-NEXT: blr +entry: + %vecins1 = shufflevector <4 x i32> %a, <4 x i32> , <4 x i32> + ret <4 x i32> %vecins1 +} + declare double @dummy() local_unnamed_addr attributes #0 = { nounwind }