From: Simon Pilgrim Date: Wed, 4 Mar 2020 15:04:03 +0000 (+0000) Subject: [X86] Add tests showing failure to combine consecutive loads + FSHL into a single... X-Git-Tag: 2020.06-alpha~123^2~176 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4c411d2419a699cd927b800dc24021f711b3e0e6;p=platform%2Fupstream%2Fllvm.git [X86] Add tests showing failure to combine consecutive loads + FSHL into a single load Similar to some of the regressions seen in D75114 --- diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll index 522a03afa59d..6d8ccef45d20 100644 --- a/llvm/test/CodeGen/X86/fshl.ll +++ b/llvm/test/CodeGen/X86/fshl.ll @@ -524,6 +524,157 @@ define i64 @const_shift_i64(i64 %x, i64 %y) nounwind { ret i64 %tmp } +; +; Combine Consecutive Loads +; + +define i8 @combine_fshl_load_i8(i8* %p) nounwind { +; X86-LABEL: combine_fshl_load_i8: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb 1(%eax), %al +; X86-NEXT: retl +; +; X64-LABEL: combine_fshl_load_i8: +; X64: # %bb.0: +; X64-NEXT: movb 1(%rdi), %al +; X64-NEXT: retq + %p1 = getelementptr i8, i8* %p, i32 1 + %ld0 = load i8, i8 *%p + %ld1 = load i8, i8 *%p1 + %res = call i8 @llvm.fshl.i8(i8 %ld1, i8 %ld0, i8 8) + ret i8 %res +} + +define i16 @combine_fshl_load_i16(i16* %p) nounwind { +; X86-FAST-LABEL: combine_fshl_load_i16: +; X86-FAST: # %bb.0: +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FAST-NEXT: movzwl (%eax), %ecx +; X86-FAST-NEXT: movzwl 2(%eax), %eax +; X86-FAST-NEXT: shldw $8, %cx, %ax +; X86-FAST-NEXT: retl +; +; X86-SLOW-LABEL: combine_fshl_load_i16: +; X86-SLOW: # %bb.0: +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLOW-NEXT: movzwl 2(%ecx), %eax +; X86-SLOW-NEXT: movzbl 1(%ecx), %ecx +; X86-SLOW-NEXT: shll $8, %eax +; X86-SLOW-NEXT: orl %ecx, %eax +; X86-SLOW-NEXT: # kill: def $ax killed $ax killed $eax +; X86-SLOW-NEXT: retl +; +; X64-FAST-LABEL: combine_fshl_load_i16: +; X64-FAST: # %bb.0: +; X64-FAST-NEXT: movzwl (%rdi), %ecx +; X64-FAST-NEXT: movzwl 2(%rdi), %eax +; X64-FAST-NEXT: shldw $8, %cx, %ax +; X64-FAST-NEXT: retq +; +; X64-SLOW-LABEL: combine_fshl_load_i16: +; X64-SLOW: # %bb.0: +; X64-SLOW-NEXT: movzwl 2(%rdi), %eax +; X64-SLOW-NEXT: movzbl 1(%rdi), %ecx +; X64-SLOW-NEXT: shll $8, %eax +; X64-SLOW-NEXT: orl %ecx, %eax +; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax +; X64-SLOW-NEXT: retq + %p0 = getelementptr i16, i16* %p, i32 0 + %p1 = getelementptr i16, i16* %p, i32 1 + %ld0 = load i16, i16 *%p0 + %ld1 = load i16, i16 *%p1 + %res = call i16 @llvm.fshl.i16(i16 %ld1, i16 %ld0, i16 8) + ret i16 %res +} + +define i32 @combine_fshl_load_i32(i32* %p) nounwind { +; X86-FAST-LABEL: combine_fshl_load_i32: +; X86-FAST: # %bb.0: +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FAST-NEXT: movl 8(%eax), %ecx +; X86-FAST-NEXT: movl 12(%eax), %eax +; X86-FAST-NEXT: shldl $8, %ecx, %eax +; X86-FAST-NEXT: retl +; +; X86-SLOW-LABEL: combine_fshl_load_i32: +; X86-SLOW: # %bb.0: +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOW-NEXT: movl 11(%eax), %eax +; X86-SLOW-NEXT: retl +; +; X64-FAST-LABEL: combine_fshl_load_i32: +; X64-FAST: # %bb.0: +; X64-FAST-NEXT: movl 8(%rdi), %ecx +; X64-FAST-NEXT: movl 12(%rdi), %eax +; X64-FAST-NEXT: shldl $8, %ecx, %eax +; X64-FAST-NEXT: retq +; +; X64-SLOW-LABEL: combine_fshl_load_i32: +; X64-SLOW: # %bb.0: +; X64-SLOW-NEXT: movl 11(%rdi), %eax +; X64-SLOW-NEXT: retq + %p0 = getelementptr i32, i32* %p, i32 2 + %p1 = getelementptr i32, i32* %p, i32 3 + %ld0 = load i32, i32 *%p0 + %ld1 = load i32, i32 *%p1 + %res = call i32 @llvm.fshl.i32(i32 %ld1, i32 %ld0, i32 8) + ret i32 %res +} + +define i64 @combine_fshl_load_i64(i64* %p) nounwind { +; X86-FAST-LABEL: combine_fshl_load_i64: +; X86-FAST: # %bb.0: +; X86-FAST-NEXT: pushl %esi +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-FAST-NEXT: movl 12(%ecx), %eax +; X86-FAST-NEXT: movl 16(%ecx), %esi +; X86-FAST-NEXT: movl 20(%ecx), %edx +; X86-FAST-NEXT: shldl $24, %esi, %edx +; X86-FAST-NEXT: shrdl $8, %esi, %eax +; X86-FAST-NEXT: popl %esi +; X86-FAST-NEXT: retl +; +; X86-SLOW-LABEL: combine_fshl_load_i64: +; X86-SLOW: # %bb.0: +; X86-SLOW-NEXT: pushl %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOW-NEXT: movl 20(%eax), %edx +; X86-SLOW-NEXT: movl 12(%eax), %ecx +; X86-SLOW-NEXT: movl 16(%eax), %esi +; X86-SLOW-NEXT: shrl $8, %ecx +; X86-SLOW-NEXT: movl %esi, %eax +; X86-SLOW-NEXT: shll $24, %eax +; X86-SLOW-NEXT: orl %ecx, %eax +; X86-SLOW-NEXT: shrl $8, %esi +; X86-SLOW-NEXT: shll $24, %edx +; X86-SLOW-NEXT: orl %esi, %edx +; X86-SLOW-NEXT: popl %esi +; X86-SLOW-NEXT: retl +; +; X64-FAST-LABEL: combine_fshl_load_i64: +; X64-FAST: # %bb.0: +; X64-FAST-NEXT: movq 8(%rdi), %rcx +; X64-FAST-NEXT: movq 16(%rdi), %rax +; X64-FAST-NEXT: shldq $24, %rcx, %rax +; X64-FAST-NEXT: retq +; +; X64-SLOW-LABEL: combine_fshl_load_i64: +; X64-SLOW: # %bb.0: +; X64-SLOW-NEXT: movq 8(%rdi), %rcx +; X64-SLOW-NEXT: movq 16(%rdi), %rax +; X64-SLOW-NEXT: shrq $40, %rcx +; X64-SLOW-NEXT: shlq $24, %rax +; X64-SLOW-NEXT: orq %rcx, %rax +; X64-SLOW-NEXT: retq + %p0 = getelementptr i64, i64* %p, i64 1 + %p1 = getelementptr i64, i64* %p, i64 2 + %ld0 = load i64, i64 *%p0 + %ld1 = load i64, i64 *%p1 + %res = call i64 @llvm.fshl.i64(i64 %ld1, i64 %ld0, i64 24) + ret i64 %res +} + !llvm.module.flags = !{!0} !0 = !{i32 1, !"ProfileSummary", !1} !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}