From: Andrea Di Biagio Date: Wed, 16 Jul 2014 11:29:39 +0000 (+0000) Subject: [X86] Add a check for 'isMOVHLPSMask' within method 'isShuffleMaskLegal'. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a03624d8aba1f363f4920affca0d2d0769a99127;p=platform%2Fupstream%2Fllvm.git [X86] Add a check for 'isMOVHLPSMask' within method 'isShuffleMaskLegal'. Before this change, method 'isShuffleMaskLegal' didn't know that shuffles implementing a 'movhlps' operation were perfectly legal for SSE targets. This patch adds the missing check for 'isMOVHLPSMask' inside method 'isShuffleMaskLegal' to fix the problem. The reason why it is important to do this is because the DAGCombiner conservatively avoids combining a pair of shuffles if the resulting shuffle node has an illegal mask. Before this patch, shuffles with a MOVHLPS mask were wrongly considered not to be legal. This was the root cause of some poor-code generation bugs. llvm-svn: 213137 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c744013..6f876b5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16880,6 +16880,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, return (SVT.getVectorNumElements() == 2 || ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isMOVLMask(M, SVT) || + isMOVHLPSMask(M, SVT) || isSHUFPMask(M, SVT) || isPSHUFDMask(M, SVT) || isPSHUFHWMask(M, SVT, Subtarget->hasInt256()) || diff --git a/llvm/test/CodeGen/X86/combine-vec-shuffle-3.ll b/llvm/test/CodeGen/X86/combine-vec-shuffle-3.ll index d966d7e..c2cff51 100644 --- a/llvm/test/CodeGen/X86/combine-vec-shuffle-3.ll +++ b/llvm/test/CodeGen/X86/combine-vec-shuffle-3.ll @@ -35,14 +35,10 @@ define <4 x float> @test4(<4 x float> %a, <4 x float> %b) { %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> ret <4 x float> %2 } -; FIXME: this should be lowered as a single movhlps. However, the backend -; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we -; end up with the sub-optimal sequence 'shufps, palignr'. ; CHECK-LABEL: test4 ; Mask: [6,7,2,3] -; CHECK: shufps $94 -; CHECK: palignr $8 -; CHECK: ret +; CHECK: movhlps +; CHECK-NEXT: ret define <4 x float> @test5(<4 x float> %a, <4 x float> %b) { %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> @@ -90,13 +86,10 @@ define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) { %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> ret <4 x i32> %2 } -; FIXME: this should be lowered as a single movhlps. However, the backend thinks that -; shuffle mask [6,7,2,3] is not legal. ; CHECK-LABEL: test9 ; Mask: [6,7,2,3] -; CHECK: shufps $94 -; CHECK: palignr $8 -; CHECK: ret +; CHECK: movhlps +; CHECK-NEXT: ret define <4 x i32> @test10(<4 x i32> %a, <4 x i32> %b) { %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -144,13 +137,9 @@ define <4 x float> @test14(<4 x float> %a, <4 x float> %b) { %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> ret <4 x float> %2 } -; FIXME: this should be lowered as a single movhlps. However, the backend -; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we -; end up with the sub-optimal sequence 'pshufd, palignr'. ; CHECK-LABEL: test14 ; Mask: [6,7,2,3] -; CHECK: pshufd $94 -; CHECK: palignr $8 +; CHECK: movhlps ; CHECK: ret define <4 x float> @test15(<4 x float> %a, <4 x float> %b) { @@ -199,13 +188,9 @@ define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) { %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> ret <4 x i32> %2 } -; FIXME: this should be lowered as a single movhlps. However, the backend -; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we -; end up with the sub-optimal sequence 'shufps, palignr'. ; CHECK-LABEL: test19 ; Mask: [6,7,2,3] -; CHECK: pshufd $94 -; CHECK: palignr $8 +; CHECK: movhlps ; CHECK: ret define <4 x i32> @test20(<4 x i32> %a, <4 x i32> %b) { @@ -371,3 +356,30 @@ define <4 x float> @blend_123(<4 x float> %a, <4 x float> %b) { ; CHECK: movss ; CHECK: ret +define <4 x i32> @test_movhl_1(<4 x i32> %a, <4 x i32> %b) { + %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test_movhl_1 +; CHECK: movhlps +; CHECK-NEXT: ret + +define <4 x i32> @test_movhl_2(<4 x i32> %a, <4 x i32> %b) { + %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test_movhl_2 +; CHECK: movhlps +; CHECK-NEXT: ret + +define <4 x i32> @test_movhl_3(<4 x i32> %a, <4 x i32> %b) { + %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> + ret <4 x i32> %2 +} +; CHECK-LABEL: test_movhl_3 +; CHECK: movhlps +; CHECK-NEXT: ret + diff --git a/llvm/test/CodeGen/X86/combine-vec-shuffle-4.ll b/llvm/test/CodeGen/X86/combine-vec-shuffle-4.ll index 45c624a..6ca4592 100644 --- a/llvm/test/CodeGen/X86/combine-vec-shuffle-4.ll +++ b/llvm/test/CodeGen/X86/combine-vec-shuffle-4.ll @@ -38,14 +38,10 @@ define <4 x float> @test4(<4 x float> %a, <4 x float> %b) { %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> ret <4 x float> %2 } -; FIXME: this should be lowered as a single movhlps. However, the backend -; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we -; end up with the sub-optimal sequence 'movhlps, palignr'. ; CHECK-LABEL: test4 ; Mask: [6,7,2,3] ; CHECK: movhlps -; CHECK: palignr $8 -; CHECK: ret +; CHECK-NEXT: ret define <4 x float> @test5(<4 x float> %a, <4 x float> %b) { %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32>