From 510feca1b86530f4c48fb69180a612cdb47fcaf2 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Fri, 13 Mar 2015 17:29:49 +0000 Subject: [PATCH] [X86][AVX] Fix wrong lowering of v4x64 shuffles into concat_vector plus extract_subvector nodes. This patch fixes a bug in the shuffle lowering logic implemented by function 'lowerV2X128VectorShuffle'. The are few cases where function 'lowerV2X128VectorShuffle' wrongly expands a shuffle of two v4X64 vectors into a CONCAT_VECTORS of two EXTRACT_SUBVECTOR nodes. The problematic expansion only occurs when the shuffle mask M has an 'undef' element at position 2, and M is equivalent to mask <0,1,4,5>. In that case, the algorithm propagates the wrong vector to one of the two new EXTRACT_SUBVECTOR nodes. Example: ;; define <4 x double> @test(<4 x double> %A, <4 x double> %B) { entry: %0 = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> ret <4 x double> %0 } ;; Before this patch, llc (-mattr=+avx) generated: vinsertf128 $1, %xmm0, %ymm0, %ymm0 With this patch, llc correctly generates: vinsertf128 $1, %xmm1, %ymm0, %ymm0 Added test lower-vec-shuffle-bug.ll Differential Revision: http://reviews.llvm.org/D8259 llvm-svn: 232179 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 ++-- llvm/test/CodeGen/X86/lower-vec-shuffle-bug.ll | 41 ++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/X86/lower-vec-shuffle-bug.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0533afc..167685a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9021,12 +9021,12 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1, VT.getVectorNumElements() / 2); // Check for patterns which can be matched with a single insert of a 128-bit // subvector. - if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}) || - isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) { + bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}); + if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) { SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, DAG.getIntPtrConstant(0)); SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, - Mask[2] < 4 ? V1 : V2, DAG.getIntPtrConstant(0)); + OnlyUsesV1 ? V1 : V2, DAG.getIntPtrConstant(0)); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); } if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 6, 7})) { diff --git a/llvm/test/CodeGen/X86/lower-vec-shuffle-bug.ll b/llvm/test/CodeGen/X86/lower-vec-shuffle-bug.ll new file mode 100644 index 0000000..5918e80 --- /dev/null +++ b/llvm/test/CodeGen/X86/lower-vec-shuffle-bug.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s + +define <4 x double> @test1(<4 x double> %A, <4 x double> %B) { +; CHECK-LABEL: test1: +; CHECK: # BB#0: +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %0 = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> + ret <4 x double> %0 +} + +define <4 x double> @test2(<4 x double> %A, <4 x double> %B) { +; CHECK-LABEL: test2: +; CHECK: # BB#0: +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %0 = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> + ret <4 x double> %0 +} + +define <4 x double> @test3(<4 x double> %A, <4 x double> %B) { +; CHECK-LABEL: test3: +; CHECK: # BB#0: +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %0 = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> + ret <4 x double> %0 +} + +define <4 x double> @test4(<4 x double> %A, <4 x double> %B) { +; CHECK-LABEL: test4: +; CHECK: # BB#0: +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; CHECK-NEXT: retq +entry: + %0 = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> + ret <4 x double> %0 +} -- 2.7.4