From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 4 Mar 2020 15:04:03 +0000 (+0000)
Subject: [X86] Add tests showing failure to combine consecutive loads + FSHL into a single... 
X-Git-Tag: 2020.06-alpha~123^2~176
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4c411d2419a699cd927b800dc24021f711b3e0e6;p=platform%2Fupstream%2Fllvm.git

[X86] Add tests showing failure to combine consecutive loads + FSHL into a single load

Similar to some of the regressions seen in D75114
---

diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll
index 522a03afa59d..6d8ccef45d20 100644
--- a/llvm/test/CodeGen/X86/fshl.ll
+++ b/llvm/test/CodeGen/X86/fshl.ll
@@ -524,6 +524,157 @@ define i64 @const_shift_i64(i64 %x, i64 %y) nounwind {
   ret i64 %tmp
 }
 
+;
+; Combine Consecutive Loads
+;
+
+define i8 @combine_fshl_load_i8(i8* %p) nounwind {
+; X86-LABEL: combine_fshl_load_i8:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb 1(%eax), %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: combine_fshl_load_i8:
+; X64:       # %bb.0:
+; X64-NEXT:    movb 1(%rdi), %al
+; X64-NEXT:    retq
+  %p1 = getelementptr i8, i8* %p, i32 1
+  %ld0 = load i8, i8 *%p
+  %ld1 = load i8, i8 *%p1
+  %res = call i8 @llvm.fshl.i8(i8 %ld1, i8 %ld0, i8 8)
+  ret i8 %res
+}
+
+define i16 @combine_fshl_load_i16(i16* %p) nounwind {
+; X86-FAST-LABEL: combine_fshl_load_i16:
+; X86-FAST:       # %bb.0:
+; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-FAST-NEXT:    movzwl (%eax), %ecx
+; X86-FAST-NEXT:    movzwl 2(%eax), %eax
+; X86-FAST-NEXT:    shldw $8, %cx, %ax
+; X86-FAST-NEXT:    retl
+;
+; X86-SLOW-LABEL: combine_fshl_load_i16:
+; X86-SLOW:       # %bb.0:
+; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT:    movzwl 2(%ecx), %eax
+; X86-SLOW-NEXT:    movzbl 1(%ecx), %ecx
+; X86-SLOW-NEXT:    shll $8, %eax
+; X86-SLOW-NEXT:    orl %ecx, %eax
+; X86-SLOW-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-SLOW-NEXT:    retl
+;
+; X64-FAST-LABEL: combine_fshl_load_i16:
+; X64-FAST:       # %bb.0:
+; X64-FAST-NEXT:    movzwl (%rdi), %ecx
+; X64-FAST-NEXT:    movzwl 2(%rdi), %eax
+; X64-FAST-NEXT:    shldw $8, %cx, %ax
+; X64-FAST-NEXT:    retq
+;
+; X64-SLOW-LABEL: combine_fshl_load_i16:
+; X64-SLOW:       # %bb.0:
+; X64-SLOW-NEXT:    movzwl 2(%rdi), %eax
+; X64-SLOW-NEXT:    movzbl 1(%rdi), %ecx
+; X64-SLOW-NEXT:    shll $8, %eax
+; X64-SLOW-NEXT:    orl %ecx, %eax
+; X64-SLOW-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-SLOW-NEXT:    retq
+  %p0 = getelementptr i16, i16* %p, i32 0
+  %p1 = getelementptr i16, i16* %p, i32 1
+  %ld0 = load i16, i16 *%p0
+  %ld1 = load i16, i16 *%p1
+  %res = call i16 @llvm.fshl.i16(i16 %ld1, i16 %ld0, i16 8)
+  ret i16 %res
+}
+
+define i32 @combine_fshl_load_i32(i32* %p) nounwind {
+; X86-FAST-LABEL: combine_fshl_load_i32:
+; X86-FAST:       # %bb.0:
+; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-FAST-NEXT:    movl 8(%eax), %ecx
+; X86-FAST-NEXT:    movl 12(%eax), %eax
+; X86-FAST-NEXT:    shldl $8, %ecx, %eax
+; X86-FAST-NEXT:    retl
+;
+; X86-SLOW-LABEL: combine_fshl_load_i32:
+; X86-SLOW:       # %bb.0:
+; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT:    movl 11(%eax), %eax
+; X86-SLOW-NEXT:    retl
+;
+; X64-FAST-LABEL: combine_fshl_load_i32:
+; X64-FAST:       # %bb.0:
+; X64-FAST-NEXT:    movl 8(%rdi), %ecx
+; X64-FAST-NEXT:    movl 12(%rdi), %eax
+; X64-FAST-NEXT:    shldl $8, %ecx, %eax
+; X64-FAST-NEXT:    retq
+;
+; X64-SLOW-LABEL: combine_fshl_load_i32:
+; X64-SLOW:       # %bb.0:
+; X64-SLOW-NEXT:    movl 11(%rdi), %eax
+; X64-SLOW-NEXT:    retq
+  %p0 = getelementptr i32, i32* %p, i32 2
+  %p1 = getelementptr i32, i32* %p, i32 3
+  %ld0 = load i32, i32 *%p0
+  %ld1 = load i32, i32 *%p1
+  %res = call i32 @llvm.fshl.i32(i32 %ld1, i32 %ld0, i32 8)
+  ret i32 %res
+}
+
+define i64 @combine_fshl_load_i64(i64* %p) nounwind {
+; X86-FAST-LABEL: combine_fshl_load_i64:
+; X86-FAST:       # %bb.0:
+; X86-FAST-NEXT:    pushl %esi
+; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT:    movl 12(%ecx), %eax
+; X86-FAST-NEXT:    movl 16(%ecx), %esi
+; X86-FAST-NEXT:    movl 20(%ecx), %edx
+; X86-FAST-NEXT:    shldl $24, %esi, %edx
+; X86-FAST-NEXT:    shrdl $8, %esi, %eax
+; X86-FAST-NEXT:    popl %esi
+; X86-FAST-NEXT:    retl
+;
+; X86-SLOW-LABEL: combine_fshl_load_i64:
+; X86-SLOW:       # %bb.0:
+; X86-SLOW-NEXT:    pushl %esi
+; X86-SLOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT:    movl 20(%eax), %edx
+; X86-SLOW-NEXT:    movl 12(%eax), %ecx
+; X86-SLOW-NEXT:    movl 16(%eax), %esi
+; X86-SLOW-NEXT:    shrl $8, %ecx
+; X86-SLOW-NEXT:    movl %esi, %eax
+; X86-SLOW-NEXT:    shll $24, %eax
+; X86-SLOW-NEXT:    orl %ecx, %eax
+; X86-SLOW-NEXT:    shrl $8, %esi
+; X86-SLOW-NEXT:    shll $24, %edx
+; X86-SLOW-NEXT:    orl %esi, %edx
+; X86-SLOW-NEXT:    popl %esi
+; X86-SLOW-NEXT:    retl
+;
+; X64-FAST-LABEL: combine_fshl_load_i64:
+; X64-FAST:       # %bb.0:
+; X64-FAST-NEXT:    movq 8(%rdi), %rcx
+; X64-FAST-NEXT:    movq 16(%rdi), %rax
+; X64-FAST-NEXT:    shldq $24, %rcx, %rax
+; X64-FAST-NEXT:    retq
+;
+; X64-SLOW-LABEL: combine_fshl_load_i64:
+; X64-SLOW:       # %bb.0:
+; X64-SLOW-NEXT:    movq 8(%rdi), %rcx
+; X64-SLOW-NEXT:    movq 16(%rdi), %rax
+; X64-SLOW-NEXT:    shrq $40, %rcx
+; X64-SLOW-NEXT:    shlq $24, %rax
+; X64-SLOW-NEXT:    orq %rcx, %rax
+; X64-SLOW-NEXT:    retq
+  %p0 = getelementptr i64, i64* %p, i64 1
+  %p1 = getelementptr i64, i64* %p, i64 2
+  %ld0 = load i64, i64 *%p0
+  %ld1 = load i64, i64 *%p1
+  %res = call i64 @llvm.fshl.i64(i64 %ld1, i64 %ld0, i64 24)
+  ret i64 %res
+}
+
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"ProfileSummary", !1}
 !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}