From a8dd6f4f309b55c43a5b263c3aa178fa5bca4b30 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 30 Sep 2017 17:57:34 +0000 Subject: [PATCH] [X86][SSE] Fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1 Remove sign extend in register style pattern if the sign is already extended enough llvm-svn: 314599 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 9 +++++++++ llvm/test/CodeGen/X86/vector-trunc.ll | 28 ++++------------------------ 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b927178..2d7cf5c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31845,6 +31845,15 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, N0.getOpcode() == X86ISD::VSRAI) return DAG.getNode(X86ISD::VSRLI, SDLoc(N), VT, N0.getOperand(0), N1); + // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1 + if (Opcode == X86ISD::VSRAI && N0.getOpcode() == X86ISD::VSHLI && + N1 == N0.getOperand(1)) { + SDValue N00 = N0.getOperand(0); + unsigned NumSignBits = DAG.ComputeNumSignBits(N00); + if (ShiftVal.ult(NumSignBits)) + return N00; + } + // We can decode 'whole byte' logical bit shifts as shuffles. if (LogicalShift && (ShiftVal.getZExtValue() % 8) == 0) { SDValue Op(N, 0); diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll index 5548ee1..bb8fd19 100644 --- a/llvm/test/CodeGen/X86/vector-trunc.ll +++ b/llvm/test/CodeGen/X86/vector-trunc.ll @@ -389,11 +389,7 @@ entry: define <8 x i16> @trunc8i32_8i16_ashr(<8 x i32> %a) { ; SSE2-LABEL: trunc8i32_8i16_ashr: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: pslld $16, %xmm1 ; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: pslld $16, %xmm0 ; SSE2-NEXT: psrad $16, %xmm0 ; SSE2-NEXT: packssdw %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -727,40 +723,24 @@ entry: define void @trunc16i32_16i16_ashr(<16 x i32> %a) { ; SSE2-LABEL: trunc16i32_16i16_ashr: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: psrad $16, %xmm2 ; SSE2-NEXT: psrad $16, %xmm3 -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm2 +; SSE2-NEXT: packssdw %xmm3, %xmm2 ; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: pslld $16, %xmm0 ; SSE2-NEXT: psrad $16, %xmm0 ; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: pslld $16, %xmm3 -; SSE2-NEXT: psrad $16, %xmm3 -; SSE2-NEXT: pslld $16, %xmm2 -; SSE2-NEXT: psrad $16, %xmm2 -; SSE2-NEXT: packssdw %xmm3, %xmm2 ; SSE2-NEXT: movdqu %xmm2, (%rax) ; SSE2-NEXT: movdqu %xmm0, (%rax) ; SSE2-NEXT: retq ; ; SSSE3-LABEL: trunc16i32_16i16_ashr: ; SSSE3: # BB#0: # %entry -; SSSE3-NEXT: psrad $16, %xmm2 ; SSSE3-NEXT: psrad $16, %xmm3 -; SSSE3-NEXT: psrad $16, %xmm0 -; SSSE3-NEXT: psrad $16, %xmm1 -; SSSE3-NEXT: pslld $16, %xmm1 +; SSSE3-NEXT: psrad $16, %xmm2 +; SSSE3-NEXT: packssdw %xmm3, %xmm2 ; SSSE3-NEXT: psrad $16, %xmm1 -; SSSE3-NEXT: pslld $16, %xmm0 ; SSSE3-NEXT: psrad $16, %xmm0 ; SSSE3-NEXT: packssdw %xmm1, %xmm0 -; SSSE3-NEXT: pslld $16, %xmm3 -; SSSE3-NEXT: psrad $16, %xmm3 -; SSSE3-NEXT: pslld $16, %xmm2 -; SSSE3-NEXT: psrad $16, %xmm2 -; SSSE3-NEXT: packssdw %xmm3, %xmm2 ; SSSE3-NEXT: movdqu %xmm2, (%rax) ; SSSE3-NEXT: movdqu %xmm0, (%rax) ; SSSE3-NEXT: retq -- 2.7.4