R600/SI: Add 32-bit LDS atomic cmpxchg
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Wed, 11 Jun 2014 18:08:48 +0000 (18:08 +0000)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Wed, 11 Jun 2014 18:08:48 +0000 (18:08 +0000)
llvm-svn: 210678

llvm/lib/Target/R600/AMDGPUInstructions.td
llvm/lib/Target/R600/SIInstructions.td
llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll [new file with mode: 0644]

index 1faf9ea..0a103e8 100644 (file)
@@ -257,6 +257,15 @@ def mskor_global : PatFrag<(ops node:$val, node:$ptr),
   return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
 }]>;
 
+def atomic_cmp_swap_32_local :
+  PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
+          (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
+  AtomicSDNode *AN = cast<AtomicSDNode>(N);
+  return AN->getMemoryVT() == MVT::i32 &&
+         AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+}]>;
+
+
 class Constants {
 int TWO_PI = 0x40c90fdb;
 int PI = 0x40490fdb;
index f6dc07f..c4360f7 100644 (file)
@@ -2250,6 +2250,21 @@ defm : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
 defm : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
 
 
+multiclass DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> {
+  def : Pat <
+    (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$cmp, vt:$swap),
+    (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
+  >;
+
+  def : Pat <
+    (frag i32:$ptr, vt:$cmp, vt:$swap),
+    (inst 0, $ptr, $cmp, $swap, 0)
+  >;
+}
+
+defm : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
+
+
 //===----------------------------------------------------------------------===//
 // MUBUF Patterns
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll b/llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll
new file mode 100644 (file)
index 0000000..7ab651f
--- /dev/null
@@ -0,0 +1,16 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_offset:
+; SI: S_LOAD_DWORD [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: V_MOV_B32_e32 [[VCMP:v[0-9]+]], 7
+; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI-DAG: V_MOV_B32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
+; SI: DS_CMPST_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]], 0x10, [M0]
+; SI: S_ENDPGM
+define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}