From 5c683972bc6c5bd29bd25891b9f54d73d47bce10 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 28 Jan 2013 07:41:18 +0000 Subject: [PATCH] Fix 256-bit PALIGNR comment decoding to understand that it works on independent 256-bit lanes. llvm-svn: 173674 --- llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp | 13 +++++++++++-- llvm/test/MC/X86/shuffle-comments.s | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp index b490f27..bbd4904 100644 --- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -66,8 +66,17 @@ void DecodePALIGNRMask(MVT VT, unsigned Imm, unsigned NumElts = VT.getVectorNumElements(); unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8); - for (unsigned i = 0; i != NumElts; ++i) - ShuffleMask.push_back((i + Offset) % (NumElts * 2)); + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumLaneElts = NumElts / NumLanes; + + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = 0; i != NumLaneElts; ++i) { + unsigned Base = i + Offset; + // if i+offset is out of this lane then we actually need the other source + if (Base >= NumLaneElts) Base += NumElts - NumLaneElts; + ShuffleMask.push_back(Base + l); + } + } } /// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. diff --git a/llvm/test/MC/X86/shuffle-comments.s b/llvm/test/MC/X86/shuffle-comments.s index e2f84af..d38fe03 100644 --- a/llvm/test/MC/X86/shuffle-comments.s +++ b/llvm/test/MC/X86/shuffle-comments.s @@ -29,3 +29,18 @@ vpalignr $0, %xmm0, %xmm1, %xmm2 # CHECK: xmm2 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] vpalignr $0, (%rax), %xmm1, %xmm2 # CHECK: xmm2 = mem[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] + +vpalignr $8, %ymm0, %ymm1, %ymm2 +# CHECK: ymm2 = ymm0[8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7],ymm0[24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23] +vpalignr $8, (%rax), %ymm1, %ymm2 +# CHECK: ymm2 = mem[8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7],mem[24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23] + +vpalignr $16, %ymm0, %ymm1, %ymm2 +# CHECK: ymm2 = ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] +vpalignr $16, (%rax), %ymm1, %ymm2 +# CHECK: ymm2 = ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] + +vpalignr $0, %ymm0, %ymm1, %ymm2 +# CHECK: ymm2 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] +vpalignr $0, (%rax), %ymm1, %ymm2 +# CHECK: ymm2 = mem[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] -- 2.7.4