We already promote SRL and SHL to i32.
This will introduce sign extends sometimes which might be harder to deal with than the zero we use for promoting SRL. I ran this through some of our internal benchmark lists and didn't see any major regressions.
I think there might be some DAG combine improvement opportunities in the test changes here.
Differential Revision: https://reviews.llvm.org/D60278
llvm-svn: 357743
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::SHL:
+ case ISD::SRA:
case ISD::SRL:
case ISD::SUB:
case ISD::ADD:
case ISD::ANY_EXTEND:
break;
case ISD::SHL:
+ case ISD::SRA:
case ISD::SRL: {
SDValue N0 = Op.getOperand(0);
// Look out for (store (shl (load), x)).
define i64 @fun8(i16 zeroext %v) {
; CHECK-LABEL: fun8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: sarw $4, %di
-; CHECK-NEXT: movzwl %di, %eax
+; CHECK-NEXT: movswl %di, %eax
+; CHECK-NEXT: shrl $4, %eax
+; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: shlq $4, %rax
; CHECK-NEXT: retq
entry:
define i16 @test_i16(i16 %a) nounwind {
; X86-NO-CMOV-LABEL: test_i16:
; X86-NO-CMOV: # %bb.0:
-; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NO-CMOV-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-NO-CMOV-NEXT: movl %eax, %ecx
-; X86-NO-CMOV-NEXT: sarw $15, %cx
+; X86-NO-CMOV-NEXT: sarl $15, %ecx
; X86-NO-CMOV-NEXT: addl %ecx, %eax
; X86-NO-CMOV-NEXT: xorl %ecx, %eax
; X86-NO-CMOV-NEXT: # kill: def $ax killed $ax killed $eax
; SSE-LABEL: ashr_op0_constant:
; SSE: # %bb.0:
; SSE-NEXT: movb (%rdi), %cl
-; SSE-NEXT: movw $-42, %ax
-; SSE-NEXT: sarw %cl, %ax
+; SSE-NEXT: movl $-42, %eax
+; SSE-NEXT: sarl %cl, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: ashr_op0_constant:
; AVX: # %bb.0:
; AVX-NEXT: movb (%rdi), %cl
-; AVX-NEXT: movw $-42, %ax
-; AVX-NEXT: sarw %cl, %ax
+; AVX-NEXT: movl $-42, %eax
+; AVX-NEXT: sarl %cl, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
%x = load i16, i16* %p
define <8 x i16> @ashr_op1_constant(i16* %p) nounwind {
; SSE-LABEL: ashr_op1_constant:
; SSE: # %bb.0:
-; SSE-NEXT: movzwl (%rdi), %eax
-; SSE-NEXT: sarw $7, %ax
+; SSE-NEXT: movswl (%rdi), %eax
+; SSE-NEXT: sarl $7, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: ashr_op1_constant:
; AVX: # %bb.0:
-; AVX-NEXT: movzwl (%rdi), %eax
-; AVX-NEXT: sarw $7, %ax
+; AVX-NEXT: movswl (%rdi), %eax
+; AVX-NEXT: sarl $7, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
%x = load i16, i16* %p
; SSE-NEXT: shrl $16, %ecx
; SSE-NEXT: addl %eax, %ecx
; SSE-NEXT: movzwl %cx, %eax
-; SSE-NEXT: sarw $5, %cx
+; SSE-NEXT: movswl %ax, %ecx
; SSE-NEXT: shrl $15, %eax
-; SSE-NEXT: addl %ecx, %eax
-; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: sarl $5, %ecx
+; SSE-NEXT: addl %eax, %ecx
+; SSE-NEXT: movd %ecx, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: sdiv_op1_constant:
; AVX-NEXT: shrl $16, %ecx
; AVX-NEXT: addl %eax, %ecx
; AVX-NEXT: movzwl %cx, %eax
-; AVX-NEXT: sarw $5, %cx
+; AVX-NEXT: movswl %ax, %ecx
; AVX-NEXT: shrl $15, %eax
-; AVX-NEXT: addl %ecx, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
+; AVX-NEXT: sarl $5, %ecx
+; AVX-NEXT: addl %eax, %ecx
+; AVX-NEXT: vmovd %ecx, %xmm0
; AVX-NEXT: retq
%x = load i16, i16* %p
%b = sdiv i16 %x, 42
; CHECK-NEXT: movzwl (%rcx), %eax
; CHECK-NEXT: movl %eax, %edx
; CHECK-NEXT: shll $12, %edx
-; CHECK-NEXT: sarw $12, %dx
+; CHECK-NEXT: movswl %dx, %edx
+; CHECK-NEXT: shrl $12, %edx
; CHECK-NEXT: movq _b@{{.*}}(%rip), %rsi
; CHECK-NEXT: orw (%rsi), %dx
; CHECK-NEXT: movl (%rcx), %ecx
; CHECK-NEXT: shll $12, %ecx
-; CHECK-NEXT: sarw $12, %cx
+; CHECK-NEXT: movswl %cx, %ecx
+; CHECK-NEXT: shrl $12, %ecx
; CHECK-NEXT: andl %edx, %ecx
; CHECK-NEXT: movw %cx, (%rsi)
; CHECK-NEXT: retq
; X64-NEXT: sarq $63, %rax
; X64-NEXT: cmpq $.Lslh_ret_addr23, %rcx
; X64-NEXT: cmovneq %r15, %rax
-; X64-NEXT: movzwl (%rbx), %ecx
-; X64-NEXT: sarw $7, %cx
-; X64-NEXT: movzwl %cx, %edi
+; X64-NEXT: movswl (%rbx), %edi
+; X64-NEXT: shrl $7, %edi
; X64-NEXT: notl %edi
+; X64-NEXT: orl $-65536, %edi # imm = 0xFFFF0000
; X64-NEXT: orl %eax, %edi
; X64-NEXT: shlq $47, %rax
; X64-NEXT: orq %rax, %rsp
; X64-LFENCE-NEXT: movl (%rbx), %edi
; X64-LFENCE-NEXT: shll $7, %edi
; X64-LFENCE-NEXT: callq sink
-; X64-LFENCE-NEXT: movzwl (%rbx), %eax
-; X64-LFENCE-NEXT: sarw $7, %ax
-; X64-LFENCE-NEXT: movzwl %ax, %edi
+; X64-LFENCE-NEXT: movswl (%rbx), %edi
+; X64-LFENCE-NEXT: shrl $7, %edi
; X64-LFENCE-NEXT: notl %edi
+; X64-LFENCE-NEXT: orl $-65536, %edi # imm = 0xFFFF0000
; X64-LFENCE-NEXT: callq sink
; X64-LFENCE-NEXT: movzwl (%rbx), %eax
; X64-LFENCE-NEXT: rolw $9, %ax