From 42ddd71120e445748a8d992e12297560590b3ca4 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Wed, 29 Jul 2015 14:31:57 +0000 Subject: [PATCH] [PPC] Fix PR24216: Don't generate splat for misaligned shuffle mask Given certain shuffle-vector masks, LLVM emits splat instructions which splat the wrong bytes from the source register. The issue is that the function PPC::isSplatShuffleMask() in PPCISelLowering.cpp does not ensure that the splat pattern found is requesting bytes that are aligned on an EltSize boundary. This patch detects this situation as not a valid splat mask, resulting in a permute being generated instead of a splat. Patch and test case by Tyler Kenney, cleaned up a bit by me. This is a simple bug fix that would be good to incorporate into 3.7. llvm-svn: 243519 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 5 +++++ llvm/test/CodeGen/PowerPC/pr24216.ll | 14 ++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/pr24216.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index e7bc903..94171a6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1430,6 +1430,11 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { assert(N->getValueType(0) == MVT::v16i8 && (EltSize == 1 || EltSize == 2 || EltSize == 4)); + // The consecutive indices need to specify an element, not part of two + // different elements. So abandon ship early if this isn't the case. + if (N->getMaskElt(0) % EltSize != 0) + return false; + // This is a splat operation if each element of the permute is the same, and // if the value doesn't reference the second vector. unsigned ElementBase = N->getMaskElt(0); diff --git a/llvm/test/CodeGen/PowerPC/pr24216.ll b/llvm/test/CodeGen/PowerPC/pr24216.ll new file mode 100644 index 0000000..4ab4198 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr24216.ll @@ -0,0 +1,14 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +; Test case adapted from PR24216. + +define void @foo(<16 x i8>* nocapture readonly %in, <16 x i8>* nocapture %out) { +entry: + %0 = load <16 x i8>, <16 x i8>* %in, align 16 + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + store <16 x i8> %1, <16 x i8>* %out, align 16 + ret void +} + +; CHECK: vperm +; CHECK-NOT: vspltw -- 2.7.4