N0.getOpcode() == X86ISD::VSRAI)
return DAG.getNode(X86ISD::VSRLI, SDLoc(N), VT, N0.getOperand(0), N1);
+ // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1
+ if (Opcode == X86ISD::VSRAI && N0.getOpcode() == X86ISD::VSHLI &&
+ N1 == N0.getOperand(1)) {
+ SDValue N00 = N0.getOperand(0);
+ unsigned NumSignBits = DAG.ComputeNumSignBits(N00);
+ if (ShiftVal.ult(NumSignBits))
+ return N00;
+ }
+
// We can decode 'whole byte' logical bit shifts as shuffles.
if (LogicalShift && (ShiftVal.getZExtValue() % 8) == 0) {
SDValue Op(N, 0);
define <8 x i16> @trunc8i32_8i16_ashr(<8 x i32> %a) {
; SSE2-LABEL: trunc8i32_8i16_ashr:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: psrad $16, %xmm0
-; SSE2-NEXT: psrad $16, %xmm1
-; SSE2-NEXT: pslld $16, %xmm1
; SSE2-NEXT: psrad $16, %xmm1
-; SSE2-NEXT: pslld $16, %xmm0
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: packssdw %xmm1, %xmm0
; SSE2-NEXT: retq
define void @trunc16i32_16i16_ashr(<16 x i32> %a) {
; SSE2-LABEL: trunc16i32_16i16_ashr:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: psrad $16, %xmm2
; SSE2-NEXT: psrad $16, %xmm3
-; SSE2-NEXT: psrad $16, %xmm0
-; SSE2-NEXT: psrad $16, %xmm1
-; SSE2-NEXT: pslld $16, %xmm1
+; SSE2-NEXT: psrad $16, %xmm2
+; SSE2-NEXT: packssdw %xmm3, %xmm2
; SSE2-NEXT: psrad $16, %xmm1
-; SSE2-NEXT: pslld $16, %xmm0
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: packssdw %xmm1, %xmm0
-; SSE2-NEXT: pslld $16, %xmm3
-; SSE2-NEXT: psrad $16, %xmm3
-; SSE2-NEXT: pslld $16, %xmm2
-; SSE2-NEXT: psrad $16, %xmm2
-; SSE2-NEXT: packssdw %xmm3, %xmm2
; SSE2-NEXT: movdqu %xmm2, (%rax)
; SSE2-NEXT: movdqu %xmm0, (%rax)
; SSE2-NEXT: retq
;
; SSSE3-LABEL: trunc16i32_16i16_ashr:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: psrad $16, %xmm2
; SSSE3-NEXT: psrad $16, %xmm3
-; SSSE3-NEXT: psrad $16, %xmm0
-; SSSE3-NEXT: psrad $16, %xmm1
-; SSSE3-NEXT: pslld $16, %xmm1
+; SSSE3-NEXT: psrad $16, %xmm2
+; SSSE3-NEXT: packssdw %xmm3, %xmm2
; SSSE3-NEXT: psrad $16, %xmm1
-; SSSE3-NEXT: pslld $16, %xmm0
; SSSE3-NEXT: psrad $16, %xmm0
; SSSE3-NEXT: packssdw %xmm1, %xmm0
-; SSSE3-NEXT: pslld $16, %xmm3
-; SSSE3-NEXT: psrad $16, %xmm3
-; SSSE3-NEXT: pslld $16, %xmm2
-; SSSE3-NEXT: psrad $16, %xmm2
-; SSSE3-NEXT: packssdw %xmm3, %xmm2
; SSSE3-NEXT: movdqu %xmm2, (%rax)
; SSSE3-NEXT: movdqu %xmm0, (%rax)
; SSSE3-NEXT: retq