From f33cb61471c0feb31aa97f52273ec5c3799b5ac9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 3 Mar 2016 21:55:01 +0000 Subject: [PATCH] [X86][AVX512BW] Fixed 512-bit PSHUFB shuffle mask decode and added combine test. PSHUFB decoder was assuming that input was 128 or 256-bit vector only. llvm-svn: 262661 --- llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp | 6 +++--- .../test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp index e2e3f8c..5c99926 100644 --- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -314,9 +314,9 @@ void DecodePSHUFBMask(ArrayRef RawMask, ShuffleMask.push_back(M); continue; } - // For AVX vectors with 32 bytes the base of the shuffle is the half of - // the vector we're inside. - int Base = i < 16 ? 0 : 16; + // For 256/512-bit vectors the base of the shuffle is the 128-bit + // subvector we're inside. + int Base = (i / 16) * 16; // If the high bit (7) of the byte is set, the element is zeroed. if (M & (1 << 7)) ShuffleMask.push_back(SM_SentinelZero); diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll new file mode 100644 index 0000000..f0b4080 --- /dev/null +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512bw | FileCheck %s + +declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <64 x i8> @combine_pshufb_identity(<64 x i8> %x0) { +; CHECK-LABEL: combine_pshufb_identity: +; CHECK: # BB#0: +; CHECK-NEXT: retq + %select = bitcast <8 x i64> to <64 x i8> + %mask = bitcast <16 x i32> to <64 x i8> + %res0 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %mask, <64 x i8> %select, i64 -1) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %res0, <64 x i8> %mask, <64 x i8> %select, i64 -1) + ret <64 x i8> %res1 +} -- 2.7.4