From: Craig Topper Date: Fri, 3 Nov 2017 22:48:13 +0000 (+0000) Subject: [X86] Give unary PERMI priority over SHUF128 in lowerV8I64VectorShuffle to make it... X-Git-Tag: llvmorg-6.0.0-rc1~4163 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d21a53f2463dcae5782eb099d5b9eabb8f223949;p=platform%2Fupstream%2Fllvm.git [X86] Give unary PERMI priority over SHUF128 in lowerV8I64VectorShuffle to make it possible to fold a load. llvm-svn: 317382 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d65a65e..ea97dc2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -13709,10 +13709,6 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - if (SDValue Shuf128 = - lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG)) - return Shuf128; - if (V2.isUndef()) { // When the shuffle is mirrored between the 128-bit lanes of the unit, we // can use lower latency instructions that will operate on all four @@ -13734,6 +13730,10 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, getV4X86ShuffleImm8ForMask(Repeated256Mask, DL, DAG)); } + if (SDValue Shuf128 = + lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG)) + return Shuf128; + // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll index 6c98055..1d17ef1 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -1165,14 +1165,31 @@ define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) { ; AVX512F-LABEL: shuffle_v8i64_01014545: ; AVX512F: # BB#0: -; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5] +; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5] ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8i64_01014545: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5] +; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5] +; AVX512F-32-NEXT: retl + + %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> + ret <8 x i64> %shuffle +} + +define <8 x i64> @shuffle_v8i64_01014545_mem(<8 x i64>* %ptr, <8 x i64> %b) { +; AVX512F-LABEL: shuffle_v8i64_01014545_mem: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = mem[0,1,0,1,4,5,4,5] +; AVX512F-NEXT: retq +; +; AVX512F-32-LABEL: shuffle_v8i64_01014545_mem: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = mem[0,1,0,1,4,5,4,5] ; AVX512F-32-NEXT: retl + %a = load <8 x i64>, <8 x i64>* %ptr %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> ret <8 x i64> %shuffle }