From cc9909b881b04d496b70ff44854d99d06e584dfc Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Mon, 4 Aug 2014 13:53:40 +0000 Subject: [PATCH] [PowerPC] Swap arguments to vpkuhum/vpkuwum on little-endian In commit r213915, Bill fixed little-endian usage of vmrgh* and vmrgl* by swapping the input arguments. As it turns out, the exact same fix is also required for the vpkuhum/vpkuwum patterns. This fixes another regression in llvmpipe when vector support is enabled. Reviewed by Bill Schmidt. llvm-svn: 214718 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 70 ++++++++++++++++++----------- llvm/lib/Target/PowerPC/PPCISelLowering.h | 4 +- llvm/lib/Target/PowerPC/PPCInstrAltivec.td | 30 +++++++++---- llvm/test/CodeGen/PowerPC/vec_shuffle_le.ll | 8 +++- 4 files changed, 74 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index bd74a09..4e95e96 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -852,14 +852,26 @@ static bool isConstantOrUndef(int Op, int Val) { /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. -bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary, +/// The ShuffleKind distinguishes between big-endian operations with +/// two different inputs (0), either-endian operations with two identical +/// inputs (1), and little-endian operantion with two different inputs (2). +/// For the latter, the input operands are swapped (see PPCInstrAltivec.td). +bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { - unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1; - if (!isUnary) { + if (ShuffleKind == 0) { + if (DAG.getTarget().getDataLayout()->isLittleEndian()) + return false; for (unsigned i = 0; i != 16; ++i) - if (!isConstantOrUndef(N->getMaskElt(i), i*2+j)) + if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) return false; - } else { + } else if (ShuffleKind == 2) { + if (!DAG.getTarget().getDataLayout()->isLittleEndian()) + return false; + for (unsigned i = 0; i != 16; ++i) + if (!isConstantOrUndef(N->getMaskElt(i), i*2)) + return false; + } else if (ShuffleKind == 1) { + unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1; for (unsigned i = 0; i != 8; ++i) if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) @@ -870,27 +882,33 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary, /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. -bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary, +/// The ShuffleKind distinguishes between big-endian operations with +/// two different inputs (0), either-endian operations with two identical +/// inputs (1), and little-endian operantion with two different inputs (2). +/// For the latter, the input operands are swapped (see PPCInstrAltivec.td). +bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { - unsigned j, k; - if (DAG.getTarget().getDataLayout()->isLittleEndian()) { - j = 0; - k = 1; - } else { - j = 2; - k = 3; - } - if (!isUnary) { + if (ShuffleKind == 0) { + if (DAG.getTarget().getDataLayout()->isLittleEndian()) + return false; for (unsigned i = 0; i != 16; i += 2) - if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || - !isConstantOrUndef(N->getMaskElt(i+1), i*2+k)) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) return false; - } else { + } else if (ShuffleKind == 2) { + if (!DAG.getTarget().getDataLayout()->isLittleEndian()) + return false; + for (unsigned i = 0; i != 16; i += 2) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) + return false; + } else if (ShuffleKind == 1) { + unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 2; for (unsigned i = 0; i != 8; i += 2) - if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || - !isConstantOrUndef(N->getMaskElt(i+1), i*2+k) || - !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || - !isConstantOrUndef(N->getMaskElt(i+9), i*2+k)) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || + !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || + !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) return false; } return true; @@ -6044,8 +6062,8 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, if (PPC::isSplatShuffleMask(SVOp, 1) || PPC::isSplatShuffleMask(SVOp, 2) || PPC::isSplatShuffleMask(SVOp, 4) || - PPC::isVPKUWUMShuffleMask(SVOp, true, DAG) || - PPC::isVPKUHUMShuffleMask(SVOp, true, DAG) || + PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || + PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || PPC::isVSLDOIShuffleMask(SVOp, true, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || @@ -6061,8 +6079,8 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // and produce a fixed permutation. If any of these match, do not lower to // VPERM. unsigned int ShuffleKind = isLittleEndian ? 2 : 0; - if (PPC::isVPKUWUMShuffleMask(SVOp, false, DAG) || - PPC::isVPKUHUMShuffleMask(SVOp, false, DAG) || + if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || + PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVSLDOIShuffleMask(SVOp, false, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index b8b917e..b4f8550 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -301,12 +301,12 @@ namespace llvm { namespace PPC { /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. - bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary, + bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG); /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. - bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary, + bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG); /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 33d3a7e..bf585f3 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -22,25 +22,31 @@ def vnot_ppc : PatFrag<(ops node:$in), def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUHUMShuffleMask(cast(N), false, - *CurDAG); + return PPC::isVPKUHUMShuffleMask(cast(N), 0, *CurDAG); }]>; def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUWUMShuffleMask(cast(N), false, - *CurDAG); + return PPC::isVPKUWUMShuffleMask(cast(N), 0, *CurDAG); }]>; def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUHUMShuffleMask(cast(N), true, - *CurDAG); + return PPC::isVPKUHUMShuffleMask(cast(N), 1, *CurDAG); }]>; def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUWUMShuffleMask(cast(N), true, - *CurDAG); + return PPC::isVPKUWUMShuffleMask(cast(N), 1, *CurDAG); }]>; +// These fragments are provided for little-endian, where the inputs must be +// swapped for correct semantics. +def vpkuhum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUHUMShuffleMask(cast(N), 2, *CurDAG); +}]>; +def vpkuwum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return PPC::isVPKUWUMShuffleMask(cast(N), 2, *CurDAG); +}]>; def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ @@ -797,6 +803,14 @@ def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef), def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef), (VPKUHUM $vA, $vA)>; +// Match vpkuwum(y,x), vpkuhum(y,x), i.e., swapped operands. +// These fragments are matched for little-endian, where the +// inputs must be swapped for correct semantics. +def:Pat<(vpkuwum_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VPKUWUM $vB, $vA)>; +def:Pat<(vpkuhum_swapped_shuffle v16i8:$vA, v16i8:$vB), + (VPKUHUM $vB, $vA)>; + // Match vmrg*(x,x) def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef), (VMRGLB $vA, $vA)>; diff --git a/llvm/test/CodeGen/PowerPC/vec_shuffle_le.ll b/llvm/test/CodeGen/PowerPC/vec_shuffle_le.ll index 3ab4cc9..4fd41a7 100644 --- a/llvm/test/CodeGen/PowerPC/vec_shuffle_le.ll +++ b/llvm/test/CodeGen/PowerPC/vec_shuffle_le.ll @@ -6,7 +6,9 @@ entry: %tmp = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> -; CHECK: vpkuhum +; CHECK: lvx [[REG1:[0-9]+]] +; CHECK: lvx [[REG2:[0-9]+]] +; CHECK: vpkuhum [[REG3:[0-9]+]], [[REG2]], [[REG1]] store <16 x i8> %tmp3, <16 x i8>* %A ret void } @@ -27,7 +29,9 @@ entry: %tmp = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> -; CHECK: vpkuwum +; CHECK: lvx [[REG1:[0-9]+]] +; CHECK: lvx [[REG2:[0-9]+]] +; CHECK: vpkuwum [[REG3:[0-9]+]], [[REG2]], [[REG1]] store <16 x i8> %tmp3, <16 x i8>* %A ret void } -- 2.7.4