From 481e50efe092e7dc4527a54a802121404bb7a58c Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 7 Dec 2012 19:01:13 +0000 Subject: [PATCH] X86: Prefer using VPSHUFD over VPERMIL because it has better throughput. llvm-svn: 169624 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 ++++--- llvm/test/CodeGen/X86/avx-intrinsics-x86.ll | 2 +- llvm/test/CodeGen/X86/avx-shuffle.ll | 4 ++-- llvm/test/CodeGen/X86/avx-splat.ll | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e59f619..d48d722 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6781,12 +6781,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { unsigned TargetMask = getShuffleSHUFImmediate(SVOp); - if (HasFp256 && (VT == MVT::v4f32 || VT == MVT::v2f64)) - return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask, DAG); - if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32)) return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG); + if (HasFp256 && (VT == MVT::v4f32 || VT == MVT::v2f64)) + return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask, + DAG); + return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V1, TargetMask, DAG); } diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index fa90ae7..0be83f6 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -2348,7 +2348,7 @@ declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind rea define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) { - ; CHECK: vpermilps + ; CHECK: vpshufd %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] ret <4 x float> %res } diff --git a/llvm/test/CodeGen/X86/avx-shuffle.ll b/llvm/test/CodeGen/X86/avx-shuffle.ll index 904f048..65685a3 100644 --- a/llvm/test/CodeGen/X86/avx-shuffle.ll +++ b/llvm/test/CodeGen/X86/avx-shuffle.ll @@ -6,7 +6,7 @@ define <4 x float> @test1(<4 x float> %a) nounwind { ret <4 x float> %b ; CHECK: test1: ; CHECK: vshufps -; CHECK: vpermilps +; CHECK: vpshufd } ; rdar://10538417 @@ -106,7 +106,7 @@ define <4 x float> @test11(<4 x float> %a) nounwind { define <4 x float> @test12(<4 x float>* %a) nounwind { ; CHECK: test12 -; CHECK: vpermilps $27, ( +; CHECK: vpshufd %tmp0 = load <4 x float>* %a %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> ret <4 x float> %tmp1 diff --git a/llvm/test/CodeGen/X86/avx-splat.ll b/llvm/test/CodeGen/X86/avx-splat.ll index 5ad7523..67e4b40 100644 --- a/llvm/test/CodeGen/X86/avx-splat.ll +++ b/llvm/test/CodeGen/X86/avx-splat.ll @@ -84,7 +84,7 @@ define <8 x float> @funcF(i32 %val) nounwind { ret <8 x float> %tmp } -; CHECK: vpermilps $0 +; CHECK: vpshufd $0 ; CHECK-NEXT: vinsertf128 $1 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp { entry: @@ -93,7 +93,7 @@ entry: } ; CHECK: vextractf128 $1 -; CHECK-NEXT: vpermilps $85 +; CHECK-NEXT: vpshufd ; CHECK-NEXT: vinsertf128 $1 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp { entry: -- 2.7.4