From 3e425c8d199bd9522824c8d648333359385e090a Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Fri, 5 Dec 2014 20:02:22 +0000 Subject: [PATCH] [X86] Improved lowering of packed vector shifts to vpsllq/vpsrlq. SSE2/AVX non-constant packed shift instructions only use the lower 64-bit of the shift count. This patch teaches function 'getTargetVShiftNode' how to deal with shifts where the shift count node is of type MVT::i64. Before this patch, function 'getTargetVShiftNode' only knew how to deal with shift count nodes of type MVT::i32. This forced the backend to wrongly truncate the shift count to MVT::i32, and then zero-extend it back to MVT::i64. llvm-svn: 223505 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 27 +++++++++++++++++---------- llvm/test/CodeGen/X86/lower-vec-shift-2.ll | 8 -------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 62af642..6d5a10f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16713,7 +16713,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, SDValue SrcOp, SDValue ShAmt, SelectionDAG &DAG) { - assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32"); + MVT SVT = ShAmt.getSimpleValueType(); + assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!"); // Catch shift-by-constant. if (ConstantSDNode *CShAmt = dyn_cast(ShAmt)) @@ -16728,13 +16729,18 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, case X86ISD::VSRAI: Opc = X86ISD::VSRA; break; } - // Need to build a vector containing shift amount - // Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0 - SDValue ShOps[4]; - ShOps[0] = ShAmt; - ShOps[1] = DAG.getConstant(0, MVT::i32); - ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, ShOps); + // Need to build a vector containing shift amount. + // SSE/AVX packed shifts only use the lower 64-bit of the shift count. + SmallVector ShOps; + ShOps.push_back(ShAmt); + if (SVT == MVT::i32) { + ShOps.push_back(DAG.getConstant(0, SVT)); + ShOps.push_back(DAG.getUNDEF(SVT)); + } + ShOps.push_back(DAG.getUNDEF(SVT)); + + MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64; + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, BVT, ShOps); // The return type has to be a 128-bit type with the same element // type as the input type. @@ -18469,8 +18475,9 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, } if (BaseShAmt.getNode()) { - if (EltVT.bitsGT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt); + assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!"); + if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32)) + BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt); else if (EltVT.bitsLT(MVT::i32)) BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); diff --git a/llvm/test/CodeGen/X86/lower-vec-shift-2.ll b/llvm/test/CodeGen/X86/lower-vec-shift-2.ll index 46023e4..90505b6 100644 --- a/llvm/test/CodeGen/X86/lower-vec-shift-2.ll +++ b/llvm/test/CodeGen/X86/lower-vec-shift-2.ll @@ -44,14 +44,10 @@ entry: define <2 x i64> @test3(<2 x i64> %A, <2 x i64> %B) { ; SSE2-LABEL: test3: ; SSE2: # BB#0 -; SSE2-NEXT: movd %xmm1, %rax -; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: psllq %xmm1, %xmm0 ; SSE2-NEXT: retq ; AVX-LABEL: test3: ; AVX: # BB#0 -; AVX-NEXT: vmovq %xmm1, %rax -; AVX-NEXT: vmovd %eax, %xmm1 ; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq entry: @@ -103,14 +99,10 @@ entry: define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) { ; SSE2-LABEL: test6: ; SSE2: # BB#0 -; SSE2-NEXT: movd %xmm1, %rax -; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: psrlq %xmm1, %xmm0 ; SSE2-NEXT: retq ; AVX-LABEL: test6: ; AVX: # BB#0 -; AVX-NEXT: vmovq %xmm1, %rax -; AVX-NEXT: vmovd %eax, %xmm1 ; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq entry: -- 2.7.4