From 84abec2855ad9803104b93e0893dc2a7191b1fd6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 13 Mar 2019 18:48:50 +0000 Subject: [PATCH] [X86] Check for 64-bit mode in X86Subtarget::hasCmpxchg16b() The feature flag alone can't be trusted since it can be passed via -mattr. Need to ensure 64-bit mode as well. We had a 64 bit mode check on the instruction to make the assembler work correctly. But we weren't guarding any of our lowering code or the hooks for the AtomicExpandPass. I've added 32-bit command lines to atomic128.ll with and without cx16. The tests there would all previously fail if -mattr=cx16 was passed to them. I had to move one test case for f128 to a new file as it seems to have a different 32-bit mode or possibly sse issue. Differential Revision: https://reviews.llvm.org/D59308 llvm-svn: 356078 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 + llvm/lib/Target/X86/X86InstrInfo.td | 2 +- llvm/lib/Target/X86/X86Subtarget.h | 2 +- llvm/test/CodeGen/X86/atomic128.ll | 578 ++++++++++++++++++++++++++++++-- llvm/test/CodeGen/X86/atomicf128.ll | 27 ++ 5 files changed, 587 insertions(+), 24 deletions(-) create mode 100644 llvm/test/CodeGen/X86/atomicf128.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d881615..ef6622e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -27209,6 +27209,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, EVT T = N->getValueType(0); assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair"); bool Regs64bit = T == MVT::i128; + assert((!Regs64bit || Subtarget.hasCmpxchg16b()) && + "64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS requires CMPXCHG16B"); MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32; SDValue cpInL, cpInH; cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 5cedac3..af17bdd 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -2094,7 +2094,7 @@ def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), "cmpxchg16b\t$dst", []>, - TB, Requires<[HasCmpxchg16b, In64BitMode]>; + TB, Requires<[HasCmpxchg16b]>; } // SchedRW, mayLoad, mayStore, hasSideEffects diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 6e2e470..ceb1e86 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -620,7 +620,7 @@ public: int getGatherOverhead() const { return GatherOverhead; } int getScatterOverhead() const { return ScatterOverhead; } bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; } - bool hasCmpxchg16b() const { return HasCmpxchg16b; } + bool hasCmpxchg16b() const { return HasCmpxchg16b && is64Bit(); } bool useLeaForSP() const { return UseLeaForSP; } bool hasPOPCNTFalseDeps() const { return HasPOPCNTFalseDeps; } bool hasLZCNTFalseDeps() const { return HasLZCNTFalseDeps; } diff --git a/llvm/test/CodeGen/X86/atomic128.ll b/llvm/test/CodeGen/X86/atomic128.ll index c3fb5ab..af32843 100644 --- a/llvm/test/CodeGen/X86/atomic128.ll +++ b/llvm/test/CodeGen/X86/atomic128.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s +; RUN: llc < %s -mtriple=i386-linux-gnu -verify-machineinstrs -mattr=cx16 | FileCheck %s -check-prefixes=CHECK32 +; RUN: llc < %s -mtriple=i386-linux-gnu -verify-machineinstrs -mattr=-cx16 | FileCheck %s -check-prefixes=CHECK32 @var = global i128 0 @@ -18,6 +20,61 @@ define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) { ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq +; +; CHECK32-LABEL: val_compare_and_swap: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %edi +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: pushl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 12 +; CHECK32-NEXT: subl $20, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 32 +; CHECK32-NEXT: .cfi_offset %esi, -12 +; CHECK32-NEXT: .cfi_offset %edi, -8 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT: subl $8, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl %eax +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: calll __sync_val_compare_and_swap_16 +; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK32-NEXT: addl $44, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset -44 +; CHECK32-NEXT: movl (%esp), %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK32-NEXT: movl %edi, 8(%esi) +; CHECK32-NEXT: movl %edx, 12(%esi) +; CHECK32-NEXT: movl %eax, (%esi) +; CHECK32-NEXT: movl %ecx, 4(%esi) +; CHECK32-NEXT: movl %esi, %eax +; CHECK32-NEXT: addl $20, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 12 +; CHECK32-NEXT: popl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: popl %edi +; CHECK32-NEXT: .cfi_def_cfa_offset 4 +; CHECK32-NEXT: retl $4 %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire %val = extractvalue { i128, i1 } %pair, 0 ret i128 %val @@ -48,6 +105,46 @@ define void @fetch_and_nand(i128* %p, i128 %bits) { ; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq +; +; CHECK32-LABEL: fetch_and_nand: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: subl $24, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 32 +; CHECK32-NEXT: .cfi_offset %esi, -8 +; CHECK32-NEXT: subl $8, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl %eax +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: calll __sync_fetch_and_nand_16 +; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK32-NEXT: addl $28, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT: movl %esi, var+8 +; CHECK32-NEXT: movl %edx, var+12 +; CHECK32-NEXT: movl %eax, var +; CHECK32-NEXT: movl %ecx, var+4 +; CHECK32-NEXT: addl $24, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: popl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 4 +; CHECK32-NEXT: retl %val = atomicrmw nand i128* %p, i128 %bits release store i128 %val, i128* @var, align 16 ret void @@ -76,6 +173,46 @@ define void @fetch_and_or(i128* %p, i128 %bits) { ; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq +; +; CHECK32-LABEL: fetch_and_or: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: subl $24, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 32 +; CHECK32-NEXT: .cfi_offset %esi, -8 +; CHECK32-NEXT: subl $8, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl %eax +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: calll __sync_fetch_and_or_16 +; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK32-NEXT: addl $28, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT: movl %esi, var+8 +; CHECK32-NEXT: movl %edx, var+12 +; CHECK32-NEXT: movl %eax, var +; CHECK32-NEXT: movl %ecx, var+4 +; CHECK32-NEXT: addl $24, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: popl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 4 +; CHECK32-NEXT: retl %val = atomicrmw or i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -104,6 +241,46 @@ define void @fetch_and_add(i128* %p, i128 %bits) { ; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq +; +; CHECK32-LABEL: fetch_and_add: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: subl $24, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 32 +; CHECK32-NEXT: .cfi_offset %esi, -8 +; CHECK32-NEXT: subl $8, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl %eax +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: calll __sync_fetch_and_add_16 +; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK32-NEXT: addl $28, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT: movl %esi, var+8 +; CHECK32-NEXT: movl %edx, var+12 +; CHECK32-NEXT: movl %eax, var +; CHECK32-NEXT: movl %ecx, var+4 +; CHECK32-NEXT: addl $24, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: popl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 4 +; CHECK32-NEXT: retl %val = atomicrmw add i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -132,6 +309,46 @@ define void @fetch_and_sub(i128* %p, i128 %bits) { ; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq +; +; CHECK32-LABEL: fetch_and_sub: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: subl $24, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 32 +; CHECK32-NEXT: .cfi_offset %esi, -8 +; CHECK32-NEXT: subl $8, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl %eax +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: calll __sync_fetch_and_sub_16 +; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK32-NEXT: addl $28, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT: movl %esi, var+8 +; CHECK32-NEXT: movl %edx, var+12 +; CHECK32-NEXT: movl %eax, var +; CHECK32-NEXT: movl %ecx, var+4 +; CHECK32-NEXT: addl $24, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: popl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 4 +; CHECK32-NEXT: retl %val = atomicrmw sub i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -163,6 +380,46 @@ define void @fetch_and_min(i128* %p, i128 %bits) { ; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq +; +; CHECK32-LABEL: fetch_and_min: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: subl $24, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 32 +; CHECK32-NEXT: .cfi_offset %esi, -8 +; CHECK32-NEXT: subl $8, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl %eax +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: calll __sync_fetch_and_min_16 +; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK32-NEXT: addl $28, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT: movl %esi, var+8 +; CHECK32-NEXT: movl %edx, var+12 +; CHECK32-NEXT: movl %eax, var +; CHECK32-NEXT: movl %ecx, var+4 +; CHECK32-NEXT: addl $24, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: popl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 4 +; CHECK32-NEXT: retl %val = atomicrmw min i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -194,6 +451,46 @@ define void @fetch_and_max(i128* %p, i128 %bits) { ; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq +; +; CHECK32-LABEL: fetch_and_max: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: subl $24, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 32 +; CHECK32-NEXT: .cfi_offset %esi, -8 +; CHECK32-NEXT: subl $8, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl %eax +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: calll __sync_fetch_and_max_16 +; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK32-NEXT: addl $28, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT: movl %esi, var+8 +; CHECK32-NEXT: movl %edx, var+12 +; CHECK32-NEXT: movl %eax, var +; CHECK32-NEXT: movl %ecx, var+4 +; CHECK32-NEXT: addl $24, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: popl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 4 +; CHECK32-NEXT: retl %val = atomicrmw max i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -225,6 +522,46 @@ define void @fetch_and_umin(i128* %p, i128 %bits) { ; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq +; +; CHECK32-LABEL: fetch_and_umin: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: subl $24, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 32 +; CHECK32-NEXT: .cfi_offset %esi, -8 +; CHECK32-NEXT: subl $8, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl %eax +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: calll __sync_fetch_and_umin_16 +; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK32-NEXT: addl $28, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT: movl %esi, var+8 +; CHECK32-NEXT: movl %edx, var+12 +; CHECK32-NEXT: movl %eax, var +; CHECK32-NEXT: movl %ecx, var+4 +; CHECK32-NEXT: addl $24, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: popl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 4 +; CHECK32-NEXT: retl %val = atomicrmw umin i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -256,6 +593,46 @@ define void @fetch_and_umax(i128* %p, i128 %bits) { ; CHECK-NEXT: movq %rdx, _var+{{.*}}(%rip) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq +; +; CHECK32-LABEL: fetch_and_umax: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: subl $24, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 32 +; CHECK32-NEXT: .cfi_offset %esi, -8 +; CHECK32-NEXT: subl $8, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl %eax +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: calll __sync_fetch_and_umax_16 +; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK32-NEXT: addl $28, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT: movl %esi, var+8 +; CHECK32-NEXT: movl %edx, var+12 +; CHECK32-NEXT: movl %eax, var +; CHECK32-NEXT: movl %ecx, var+4 +; CHECK32-NEXT: addl $24, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: popl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 4 +; CHECK32-NEXT: retl %val = atomicrmw umax i128* %p, i128 %bits seq_cst store i128 %val, i128* @var, align 16 ret void @@ -274,6 +651,61 @@ define i128 @atomic_load_seq_cst(i128* %p) { ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq +; +; CHECK32-LABEL: atomic_load_seq_cst: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %edi +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: pushl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 12 +; CHECK32-NEXT: subl $20, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 32 +; CHECK32-NEXT: .cfi_offset %esi, -12 +; CHECK32-NEXT: .cfi_offset %edi, -8 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT: subl $8, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: pushl $0 +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl $0 +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl $0 +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl $0 +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl $0 +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl $0 +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl $0 +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl $0 +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl %eax +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: calll __sync_val_compare_and_swap_16 +; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK32-NEXT: addl $44, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset -44 +; CHECK32-NEXT: movl (%esp), %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK32-NEXT: movl %edi, 8(%esi) +; CHECK32-NEXT: movl %edx, 12(%esi) +; CHECK32-NEXT: movl %eax, (%esi) +; CHECK32-NEXT: movl %ecx, 4(%esi) +; CHECK32-NEXT: movl %esi, %eax +; CHECK32-NEXT: addl $20, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 12 +; CHECK32-NEXT: popl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: popl %edi +; CHECK32-NEXT: .cfi_def_cfa_offset 4 +; CHECK32-NEXT: retl $4 %r = load atomic i128, i128* %p seq_cst, align 16 ret i128 %r } @@ -291,6 +723,61 @@ define i128 @atomic_load_relaxed(i128* %p) { ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq +; +; CHECK32-LABEL: atomic_load_relaxed: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %edi +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: pushl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 12 +; CHECK32-NEXT: subl $20, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 32 +; CHECK32-NEXT: .cfi_offset %esi, -12 +; CHECK32-NEXT: .cfi_offset %edi, -8 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT: subl $8, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: pushl $0 +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl $0 +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl $0 +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl $0 +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl $0 +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl $0 +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl $0 +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl $0 +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl %eax +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: calll __sync_val_compare_and_swap_16 +; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK32-NEXT: addl $44, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset -44 +; CHECK32-NEXT: movl (%esp), %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK32-NEXT: movl %edi, 8(%esi) +; CHECK32-NEXT: movl %edx, 12(%esi) +; CHECK32-NEXT: movl %eax, (%esi) +; CHECK32-NEXT: movl %ecx, 4(%esi) +; CHECK32-NEXT: movl %esi, %eax +; CHECK32-NEXT: addl $20, %esp +; CHECK32-NEXT: .cfi_def_cfa_offset 12 +; CHECK32-NEXT: popl %esi +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: popl %edi +; CHECK32-NEXT: .cfi_def_cfa_offset 4 +; CHECK32-NEXT: retl $4 %r = load atomic i128, i128* %p monotonic, align 16 ret i128 %r } @@ -313,6 +800,29 @@ define void @atomic_store_seq_cst(i128* %p, i128 %in) { ; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq +; +; CHECK32-LABEL: atomic_store_seq_cst: +; CHECK32: # %bb.0: +; CHECK32-NEXT: subl $36, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset 36 +; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl %eax +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: calll __sync_lock_test_and_set_16 +; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK32-NEXT: addl $56, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset -56 +; CHECK32-NEXT: retl store atomic i128 %in, i128* %p seq_cst, align 16 ret void } @@ -335,6 +845,29 @@ define void @atomic_store_release(i128* %p, i128 %in) { ; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq +; +; CHECK32-LABEL: atomic_store_release: +; CHECK32: # %bb.0: +; CHECK32-NEXT: subl $36, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset 36 +; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl %eax +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: calll __sync_lock_test_and_set_16 +; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK32-NEXT: addl $56, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset -56 +; CHECK32-NEXT: retl store atomic i128 %in, i128* %p release, align 16 ret void } @@ -357,30 +890,31 @@ define void @atomic_store_relaxed(i128* %p, i128 %in) { ; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq +; +; CHECK32-LABEL: atomic_store_relaxed: +; CHECK32: # %bb.0: +; CHECK32-NEXT: subl $36, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset 36 +; CHECK32-NEXT: leal {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl {{[0-9]+}}(%esp) +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl %eax +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: calll __sync_lock_test_and_set_16 +; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK32-NEXT: addl $56, %esp +; CHECK32-NEXT: .cfi_adjust_cfa_offset -56 +; CHECK32-NEXT: retl store atomic i128 %in, i128* %p unordered, align 16 ret void } -@fsc128 = external global fp128 - -define void @atomic_fetch_swapf128(fp128 %x) nounwind { -; CHECK-LABEL: atomic_fetch_swapf128: -; CHECK: ## %bb.0: -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: movq _fsc128@{{.*}}(%rip), %rsi -; CHECK-NEXT: movq (%rsi), %rax -; CHECK-NEXT: movq 8(%rsi), %rdx -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB14_1: ## %atomicrmw.start -; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lock cmpxchg16b (%rsi) -; CHECK-NEXT: jne LBB14_1 -; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: retq - %t1 = atomicrmw xchg fp128* @fsc128, fp128 %x acquire - ret void -} diff --git a/llvm/test/CodeGen/X86/atomicf128.ll b/llvm/test/CodeGen/X86/atomicf128.ll new file mode 100644 index 0000000..6455fc5 --- /dev/null +++ b/llvm/test/CodeGen/X86/atomicf128.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s + +; FIXME: This test has a fatal error in 32-bit mode + +@fsc128 = external global fp128 + +define void @atomic_fetch_swapf128(fp128 %x) nounwind { +; CHECK-LABEL: atomic_fetch_swapf128: +; CHECK: ## %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movq %rsi, %rcx +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movq _fsc128@{{.*}}(%rip), %rsi +; CHECK-NEXT: movq (%rsi), %rax +; CHECK-NEXT: movq 8(%rsi), %rdx +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_1: ## %atomicrmw.start +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lock cmpxchg16b (%rsi) +; CHECK-NEXT: jne LBB0_1 +; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq + %t1 = atomicrmw xchg fp128* @fsc128, fp128 %x acquire + ret void +} -- 2.7.4