-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X86
; Make sure that flags are properly preserved despite atomic optimizations.
define i32 @atomic_and_flags_1(i8* %p, i32 %a, i32 %b) {
-; CHECK-LABEL: atomic_and_flags_1:
-
+; X64-LABEL: atomic_and_flags_1:
+; X64: # %bb.0:
+; X64-NEXT: cmpl %edx, %esi
+; X64-NEXT: jne .LBB0_3
+; X64-NEXT: # %bb.1: # %L1
+; X64-NEXT: incb (%rdi)
+; X64-NEXT: cmpl %edx, %esi
+; X64-NEXT: jne .LBB0_2
+; X64-NEXT: # %bb.4: # %L3
+; X64-NEXT: movl $3, %eax
+; X64-NEXT: retq
+; X64-NEXT: .LBB0_3: # %L2
+; X64-NEXT: movl $2, %eax
+; X64-NEXT: retq
+; X64-NEXT: .LBB0_2: # %L4
+; X64-NEXT: movl $4, %eax
+; X64-NEXT: retq
+;
+; X86-LABEL: atomic_and_flags_1:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: jne .LBB0_3
+; X86-NEXT: # %bb.1: # %L1
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: incb (%edx)
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: jne .LBB0_2
+; X86-NEXT: # %bb.4: # %L3
+; X86-NEXT: movl $3, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_3: # %L2
+; X86-NEXT: movl $2, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_2: # %L4
+; X86-NEXT: movl $4, %eax
+; X86-NEXT: retl
; Generate flags value, and use it.
- ; CHECK: cmpl
- ; CHECK-NEXT: jne
%cmp = icmp eq i32 %a, %b
br i1 %cmp, label %L1, label %L2
L1:
; The following pattern will get folded.
- ; CHECK: incb
%1 = load atomic i8, i8* %p seq_cst, align 1
%2 = add i8 %1, 1 ; This forces the INC instruction to be generated.
store atomic i8 %2, i8* %p release, align 1
; somehow. This test checks that cmpl gets emitted again, but any
; rematerialization would work (the optimizer used to clobber the flags with
; the add).
- ; CHECK-NEXT: cmpl
- ; CHECK-NEXT: jne
br i1 %cmp, label %L3, label %L4
L2:
; Same as above, but using 2 as immediate to avoid the INC instruction.
define i32 @atomic_and_flags_2(i8* %p, i32 %a, i32 %b) {
-; CHECK-LABEL: atomic_and_flags_2:
- ; CHECK: cmpl
- ; CHECK-NEXT: jne
+; X64-LABEL: atomic_and_flags_2:
+; X64: # %bb.0:
+; X64-NEXT: cmpl %edx, %esi
+; X64-NEXT: jne .LBB1_3
+; X64-NEXT: # %bb.1: # %L1
+; X64-NEXT: addb $2, (%rdi)
+; X64-NEXT: cmpl %edx, %esi
+; X64-NEXT: jne .LBB1_2
+; X64-NEXT: # %bb.4: # %L3
+; X64-NEXT: movl $3, %eax
+; X64-NEXT: retq
+; X64-NEXT: .LBB1_3: # %L2
+; X64-NEXT: movl $2, %eax
+; X64-NEXT: retq
+; X64-NEXT: .LBB1_2: # %L4
+; X64-NEXT: movl $4, %eax
+; X64-NEXT: retq
+;
+; X86-LABEL: atomic_and_flags_2:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: jne .LBB1_3
+; X86-NEXT: # %bb.1: # %L1
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: addb $2, (%edx)
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: jne .LBB1_2
+; X86-NEXT: # %bb.4: # %L3
+; X86-NEXT: movl $3, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB1_3: # %L2
+; X86-NEXT: movl $2, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB1_2: # %L4
+; X86-NEXT: movl $4, %eax
+; X86-NEXT: retl
%cmp = icmp eq i32 %a, %b
br i1 %cmp, label %L1, label %L2
L1:
- ; CHECK: addb
%1 = load atomic i8, i8* %p seq_cst, align 1
%2 = add i8 %1, 2
store atomic i8 %2, i8* %p release, align 1
- ; CHECK-NEXT: cmpl
- ; CHECK-NEXT: jne
br i1 %cmp, label %L3, label %L4
L2:
ret i32 2