From 376c87fcd42bbc5a4689a5f3d0efef4a654eb618 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 19 Mar 2019 16:46:56 +0000 Subject: [PATCH] [Tests] Update to newer ISA There are some issues w/missed opts on older platforms, but that's not the purpose of this test. Using a newer API points out that some TODOs are already handled, and allows addition of tests to exercise other issues (future patch.) llvm-svn: 356473 --- llvm/test/CodeGen/X86/atomic-unordered.ll | 246 ++++++++++++++++++++++-------- 1 file changed, 179 insertions(+), 67 deletions(-) diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll index ba77d53..aef99b7 100644 --- a/llvm/test/CodeGen/X86/atomic-unordered.ll +++ b/llvm/test/CodeGen/X86/atomic-unordered.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck --check-prefix=CHECK-O0 %s -; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck --check-prefix=CHECK-O3 %s +; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefix=CHECK-O0 %s +; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefix=CHECK-O3 %s define i8 @load_i8(i8* %ptr) { ; CHECK-O0-LABEL: load_i8: @@ -276,18 +276,16 @@ define void @widen_broadcast_unaligned(i32* %p0, i32 %v) { define void @vec_store(i32* %p0, <2 x i32> %vec) { ; CHECK-O0-LABEL: vec_store: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movd %xmm0, %eax -; CHECK-O0-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; CHECK-O0-NEXT: movd %xmm0, %ecx +; CHECK-O0-NEXT: vmovd %xmm0, %eax +; CHECK-O0-NEXT: vpextrd $2, %xmm0, %ecx ; CHECK-O0-NEXT: movl %eax, (%rdi) ; CHECK-O0-NEXT: movl %ecx, 4(%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: vec_store: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movd %xmm0, %eax -; CHECK-O3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; CHECK-O3-NEXT: movd %xmm0, %ecx +; CHECK-O3-NEXT: vmovd %xmm0, %eax +; CHECK-O3-NEXT: vpextrd $2, %xmm0, %ecx ; CHECK-O3-NEXT: movl %eax, (%rdi) ; CHECK-O3-NEXT: movl %ecx, 4(%rdi) ; CHECK-O3-NEXT: retq @@ -303,18 +301,16 @@ define void @vec_store(i32* %p0, <2 x i32> %vec) { define void @vec_store_unaligned(i32* %p0, <2 x i32> %vec) { ; CHECK-O0-LABEL: vec_store_unaligned: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movd %xmm0, %eax -; CHECK-O0-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; CHECK-O0-NEXT: movd %xmm0, %ecx +; CHECK-O0-NEXT: vmovd %xmm0, %eax +; CHECK-O0-NEXT: vpextrd $2, %xmm0, %ecx ; CHECK-O0-NEXT: movl %eax, (%rdi) ; CHECK-O0-NEXT: movl %ecx, 4(%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: vec_store_unaligned: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movd %xmm0, %eax -; CHECK-O3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; CHECK-O3-NEXT: movd %xmm0, %ecx +; CHECK-O3-NEXT: vmovd %xmm0, %eax +; CHECK-O3-NEXT: vpextrd $2, %xmm0, %ecx ; CHECK-O3-NEXT: movl %eax, (%rdi) ; CHECK-O3-NEXT: movl %ecx, 4(%rdi) ; CHECK-O3-NEXT: retq @@ -333,14 +329,14 @@ define void @vec_store_unaligned(i32* %p0, <2 x i32> %vec) { define void @widen_broadcast2(i32* %p0, <2 x i32> %vec) { ; CHECK-O0-LABEL: widen_broadcast2: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movd %xmm0, %eax +; CHECK-O0-NEXT: vmovd %xmm0, %eax ; CHECK-O0-NEXT: movl %eax, (%rdi) ; CHECK-O0-NEXT: movl %eax, 4(%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: widen_broadcast2: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movd %xmm0, %eax +; CHECK-O3-NEXT: vmovd %xmm0, %eax ; CHECK-O3-NEXT: movl %eax, (%rdi) ; CHECK-O3-NEXT: movl %eax, 4(%rdi) ; CHECK-O3-NEXT: retq @@ -355,14 +351,14 @@ define void @widen_broadcast2(i32* %p0, <2 x i32> %vec) { define void @widen_broadcast2_unaligned(i32* %p0, <2 x i32> %vec) { ; CHECK-O0-LABEL: widen_broadcast2_unaligned: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movd %xmm0, %eax +; CHECK-O0-NEXT: vmovd %xmm0, %eax ; CHECK-O0-NEXT: movl %eax, (%rdi) ; CHECK-O0-NEXT: movl %eax, 4(%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: widen_broadcast2_unaligned: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movd %xmm0, %eax +; CHECK-O3-NEXT: vmovd %xmm0, %eax ; CHECK-O3-NEXT: movl %eax, (%rdi) ; CHECK-O3-NEXT: movl %eax, 4(%rdi) ; CHECK-O3-NEXT: retq @@ -622,9 +618,20 @@ define i64 @load_fold_sdiv2(i64* %p, i64 %v2) { ; CHECK-O3-LABEL: load_fold_sdiv2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movq %rax, %rcx +; CHECK-O3-NEXT: orq %rsi, %rcx +; CHECK-O3-NEXT: shrq $32, %rcx +; CHECK-O3-NEXT: je .LBB31_1 +; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: cqto ; CHECK-O3-NEXT: idivq %rsi ; CHECK-O3-NEXT: retq +; CHECK-O3-NEXT: .LBB31_1: +; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-O3-NEXT: xorl %edx, %edx +; CHECK-O3-NEXT: divl %esi +; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = sdiv i64 %v, %v2 ret i64 %ret @@ -641,8 +648,20 @@ define i64 @load_fold_sdiv3(i64* %p1, i64* %p2) { ; CHECK-O3-LABEL: load_fold_sdiv3: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movq (%rsi), %rcx +; CHECK-O3-NEXT: movq %rax, %rdx +; CHECK-O3-NEXT: orq %rcx, %rdx +; CHECK-O3-NEXT: shrq $32, %rdx +; CHECK-O3-NEXT: je .LBB32_1 +; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: cqto -; CHECK-O3-NEXT: idivq (%rsi) +; CHECK-O3-NEXT: idivq %rcx +; CHECK-O3-NEXT: retq +; CHECK-O3-NEXT: .LBB32_1: +; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-O3-NEXT: xorl %edx, %edx +; CHECK-O3-NEXT: divl %ecx +; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 @@ -686,9 +705,20 @@ define i64 @load_fold_udiv2(i64* %p, i64 %v2) { ; CHECK-O3-LABEL: load_fold_udiv2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movq %rax, %rcx +; CHECK-O3-NEXT: orq %rsi, %rcx +; CHECK-O3-NEXT: shrq $32, %rcx +; CHECK-O3-NEXT: je .LBB34_1 +; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divq %rsi ; CHECK-O3-NEXT: retq +; CHECK-O3-NEXT: .LBB34_1: +; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-O3-NEXT: xorl %edx, %edx +; CHECK-O3-NEXT: divl %esi +; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = udiv i64 %v, %v2 ret i64 %ret @@ -706,8 +736,20 @@ define i64 @load_fold_udiv3(i64* %p1, i64* %p2) { ; CHECK-O3-LABEL: load_fold_udiv3: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movq (%rsi), %rcx +; CHECK-O3-NEXT: movq %rax, %rdx +; CHECK-O3-NEXT: orq %rcx, %rdx +; CHECK-O3-NEXT: shrq $32, %rdx +; CHECK-O3-NEXT: je .LBB35_1 +; CHECK-O3-NEXT: # %bb.2: +; CHECK-O3-NEXT: xorl %edx, %edx +; CHECK-O3-NEXT: divq %rcx +; CHECK-O3-NEXT: retq +; CHECK-O3-NEXT: .LBB35_1: +; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: xorl %edx, %edx -; CHECK-O3-NEXT: divq (%rsi) +; CHECK-O3-NEXT: divl %ecx +; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 @@ -760,10 +802,21 @@ define i64 @load_fold_srem2(i64* %p, i64 %v2) { ; CHECK-O3-LABEL: load_fold_srem2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movq %rax, %rcx +; CHECK-O3-NEXT: orq %rsi, %rcx +; CHECK-O3-NEXT: shrq $32, %rcx +; CHECK-O3-NEXT: je .LBB37_1 +; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: cqto ; CHECK-O3-NEXT: idivq %rsi ; CHECK-O3-NEXT: movq %rdx, %rax ; CHECK-O3-NEXT: retq +; CHECK-O3-NEXT: .LBB37_1: +; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-O3-NEXT: xorl %edx, %edx +; CHECK-O3-NEXT: divl %esi +; CHECK-O3-NEXT: movl %edx, %eax +; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = srem i64 %v, %v2 ret i64 %ret @@ -781,10 +834,22 @@ define i64 @load_fold_srem3(i64* %p1, i64* %p2) { ; CHECK-O3-LABEL: load_fold_srem3: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movq (%rsi), %rcx +; CHECK-O3-NEXT: movq %rax, %rdx +; CHECK-O3-NEXT: orq %rcx, %rdx +; CHECK-O3-NEXT: shrq $32, %rdx +; CHECK-O3-NEXT: je .LBB38_1 +; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: cqto -; CHECK-O3-NEXT: idivq (%rsi) +; CHECK-O3-NEXT: idivq %rcx ; CHECK-O3-NEXT: movq %rdx, %rax ; CHECK-O3-NEXT: retq +; CHECK-O3-NEXT: .LBB38_1: +; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-O3-NEXT: xorl %edx, %edx +; CHECK-O3-NEXT: divl %ecx +; CHECK-O3-NEXT: movl %edx, %eax +; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = srem i64 %v, %v2 @@ -834,10 +899,21 @@ define i64 @load_fold_urem2(i64* %p, i64 %v2) { ; CHECK-O3-LABEL: load_fold_urem2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movq %rax, %rcx +; CHECK-O3-NEXT: orq %rsi, %rcx +; CHECK-O3-NEXT: shrq $32, %rcx +; CHECK-O3-NEXT: je .LBB40_1 +; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divq %rsi ; CHECK-O3-NEXT: movq %rdx, %rax ; CHECK-O3-NEXT: retq +; CHECK-O3-NEXT: .LBB40_1: +; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-O3-NEXT: xorl %edx, %edx +; CHECK-O3-NEXT: divl %esi +; CHECK-O3-NEXT: movl %edx, %eax +; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = urem i64 %v, %v2 ret i64 %ret @@ -856,10 +932,22 @@ define i64 @load_fold_urem3(i64* %p1, i64* %p2) { ; CHECK-O3-LABEL: load_fold_urem3: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movq (%rsi), %rcx +; CHECK-O3-NEXT: movq %rax, %rdx +; CHECK-O3-NEXT: orq %rcx, %rdx +; CHECK-O3-NEXT: shrq $32, %rdx +; CHECK-O3-NEXT: je .LBB41_1 +; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: xorl %edx, %edx -; CHECK-O3-NEXT: divq (%rsi) +; CHECK-O3-NEXT: divq %rcx ; CHECK-O3-NEXT: movq %rdx, %rax ; CHECK-O3-NEXT: retq +; CHECK-O3-NEXT: .LBB41_1: +; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-O3-NEXT: xorl %edx, %edx +; CHECK-O3-NEXT: divl %ecx +; CHECK-O3-NEXT: movl %edx, %eax +; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = urem i64 %v, %v2 @@ -885,7 +973,6 @@ define i64 @load_fold_shl1(i64* %p) { ret i64 %ret } -; Legal to fold (TODO) define i64 @load_fold_shl2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_shl2: ; CHECK-O0: # %bb.0: @@ -898,17 +985,13 @@ define i64 @load_fold_shl2(i64* %p, i64 %v2) { ; ; CHECK-O3-LABEL: load_fold_shl2: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq %rsi, %rcx -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: # kill: def $cl killed $cl killed $rcx -; CHECK-O3-NEXT: shlq %cl, %rax +; CHECK-O3-NEXT: shlxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = shl i64 %v, %v2 ret i64 %ret } -; Legal to fold (TODO) define i64 @load_fold_shl3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_shl3: ; CHECK-O0: # %bb.0: @@ -921,10 +1004,8 @@ define i64 @load_fold_shl3(i64* %p1, i64* %p2) { ; ; CHECK-O3-LABEL: load_fold_shl3: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: movq (%rsi), %rcx -; CHECK-O3-NEXT: # kill: def $cl killed $cl killed $rcx -; CHECK-O3-NEXT: shlq %cl, %rax +; CHECK-O3-NEXT: movq (%rsi), %rax +; CHECK-O3-NEXT: shlxq %rax, (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 @@ -951,7 +1032,6 @@ define i64 @load_fold_lshr1(i64* %p) { ret i64 %ret } -; Legal to fold (TODO) define i64 @load_fold_lshr2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_lshr2: ; CHECK-O0: # %bb.0: @@ -964,17 +1044,13 @@ define i64 @load_fold_lshr2(i64* %p, i64 %v2) { ; ; CHECK-O3-LABEL: load_fold_lshr2: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq %rsi, %rcx -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: # kill: def $cl killed $cl killed $rcx -; CHECK-O3-NEXT: shrq %cl, %rax +; CHECK-O3-NEXT: shrxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = lshr i64 %v, %v2 ret i64 %ret } -; Legal to fold (TODO) define i64 @load_fold_lshr3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_lshr3: ; CHECK-O0: # %bb.0: @@ -987,10 +1063,8 @@ define i64 @load_fold_lshr3(i64* %p1, i64* %p2) { ; ; CHECK-O3-LABEL: load_fold_lshr3: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: movq (%rsi), %rcx -; CHECK-O3-NEXT: # kill: def $cl killed $cl killed $rcx -; CHECK-O3-NEXT: shrq %cl, %rax +; CHECK-O3-NEXT: movq (%rsi), %rax +; CHECK-O3-NEXT: shrxq %rax, (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 @@ -1017,7 +1091,6 @@ define i64 @load_fold_ashr1(i64* %p) { ret i64 %ret } -; Legal to fold (TODO) define i64 @load_fold_ashr2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_ashr2: ; CHECK-O0: # %bb.0: @@ -1030,17 +1103,13 @@ define i64 @load_fold_ashr2(i64* %p, i64 %v2) { ; ; CHECK-O3-LABEL: load_fold_ashr2: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq %rsi, %rcx -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: # kill: def $cl killed $cl killed $rcx -; CHECK-O3-NEXT: sarq %cl, %rax +; CHECK-O3-NEXT: sarxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = ashr i64 %v, %v2 ret i64 %ret } -; Legal to fold (TODO) define i64 @load_fold_ashr3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_ashr3: ; CHECK-O0: # %bb.0: @@ -1053,10 +1122,8 @@ define i64 @load_fold_ashr3(i64* %p1, i64* %p2) { ; ; CHECK-O3-LABEL: load_fold_ashr3: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: movq (%rsi), %rcx -; CHECK-O3-NEXT: # kill: def $cl killed $cl killed $rcx -; CHECK-O3-NEXT: sarq %cl, %rax +; CHECK-O3-NEXT: movq (%rsi), %rax +; CHECK-O3-NEXT: sarxq %rax, (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 @@ -1467,10 +1534,22 @@ define void @rmw_fold_sdiv2(i64* %p, i64 %v) { ; CHECK-O3-LABEL: rmw_fold_sdiv2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movq %rax, %rcx +; CHECK-O3-NEXT: orq %rsi, %rcx +; CHECK-O3-NEXT: shrq $32, %rcx +; CHECK-O3-NEXT: je .LBB70_1 +; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: cqto ; CHECK-O3-NEXT: idivq %rsi ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; CHECK-O3-NEXT: .LBB70_1: +; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-O3-NEXT: xorl %edx, %edx +; CHECK-O3-NEXT: divl %esi +; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-O3-NEXT: movq %rax, (%rdi) +; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = sdiv i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1517,10 +1596,22 @@ define void @rmw_fold_udiv2(i64* %p, i64 %v) { ; CHECK-O3-LABEL: rmw_fold_udiv2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movq %rax, %rcx +; CHECK-O3-NEXT: orq %rsi, %rcx +; CHECK-O3-NEXT: shrq $32, %rcx +; CHECK-O3-NEXT: je .LBB72_1 +; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divq %rsi ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; CHECK-O3-NEXT: .LBB72_1: +; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-O3-NEXT: xorl %edx, %edx +; CHECK-O3-NEXT: divl %esi +; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-O3-NEXT: movq %rax, (%rdi) +; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = udiv i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1583,10 +1674,22 @@ define void @rmw_fold_srem2(i64* %p, i64 %v) { ; CHECK-O3-LABEL: rmw_fold_srem2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movq %rax, %rcx +; CHECK-O3-NEXT: orq %rsi, %rcx +; CHECK-O3-NEXT: shrq $32, %rcx +; CHECK-O3-NEXT: je .LBB74_1 +; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: cqto ; CHECK-O3-NEXT: idivq %rsi ; CHECK-O3-NEXT: movq %rdx, (%rdi) ; CHECK-O3-NEXT: retq +; CHECK-O3-NEXT: .LBB74_1: +; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-O3-NEXT: xorl %edx, %edx +; CHECK-O3-NEXT: divl %esi +; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx +; CHECK-O3-NEXT: movq %rdx, (%rdi) +; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = srem i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1642,10 +1745,22 @@ define void @rmw_fold_urem2(i64* %p, i64 %v) { ; CHECK-O3-LABEL: rmw_fold_urem2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: movq %rax, %rcx +; CHECK-O3-NEXT: orq %rsi, %rcx +; CHECK-O3-NEXT: shrq $32, %rcx +; CHECK-O3-NEXT: je .LBB76_1 +; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divq %rsi ; CHECK-O3-NEXT: movq %rdx, (%rdi) ; CHECK-O3-NEXT: retq +; CHECK-O3-NEXT: .LBB76_1: +; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-O3-NEXT: xorl %edx, %edx +; CHECK-O3-NEXT: divl %esi +; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx +; CHECK-O3-NEXT: movq %rdx, (%rdi) +; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = urem i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1680,16 +1795,15 @@ define void @rmw_fold_shl2(i64* %p, i64 %v) { ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movb %sil, %cl -; CHECK-O0-NEXT: shlq %cl, %rax +; CHECK-O0-NEXT: # implicit-def: $rsi +; CHECK-O0-NEXT: movb %cl, %sil +; CHECK-O0-NEXT: shlxq %rsi, %rax, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_shl2: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq %rsi, %rcx -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: # kill: def $cl killed $cl killed $rcx -; CHECK-O3-NEXT: shlq %cl, %rax +; CHECK-O3-NEXT: shlxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 @@ -1726,16 +1840,15 @@ define void @rmw_fold_lshr2(i64* %p, i64 %v) { ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movb %sil, %cl -; CHECK-O0-NEXT: shrq %cl, %rax +; CHECK-O0-NEXT: # implicit-def: $rsi +; CHECK-O0-NEXT: movb %cl, %sil +; CHECK-O0-NEXT: shrxq %rsi, %rax, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_lshr2: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq %rsi, %rcx -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: # kill: def $cl killed $cl killed $rcx -; CHECK-O3-NEXT: shrq %cl, %rax +; CHECK-O3-NEXT: shrxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 @@ -1772,16 +1885,15 @@ define void @rmw_fold_ashr2(i64* %p, i64 %v) { ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movb %sil, %cl -; CHECK-O0-NEXT: sarq %cl, %rax +; CHECK-O0-NEXT: # implicit-def: $rsi +; CHECK-O0-NEXT: movb %cl, %sil +; CHECK-O0-NEXT: sarxq %rsi, %rax, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_ashr2: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq %rsi, %rcx -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: # kill: def $cl killed $cl killed $rcx -; CHECK-O3-NEXT: sarq %cl, %rax +; CHECK-O3-NEXT: sarxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 -- 2.7.4