NODE_NAME_CASE(INTERP_P2)
NODE_NAME_CASE(STORE_MSKOR)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
+ NODE_NAME_CASE(ATOMIC_CMP_SWAP)
case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
}
return nullptr;
STORE_MSKOR,
LOAD_CONSTANT,
TBUFFER_STORE_FORMAT,
+ ATOMIC_CMP_SWAP,
LAST_AMDGPU_ISD_NUMBER
};
SDTypeProfile<0, 2, []>,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AMDGPUatomic_cmp_swap : SDNode<"AMDGPUISD::ATOMIC_CMP_SWAP",
+ SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisVec<2>]>,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPMemOperand]>;
+
def AMDGPUround : SDNode<"ISD::FROUND",
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>;
def atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;
def atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
+def atomic_cmp_swap_global : global_binary_atomic_op<AMDGPUatomic_cmp_swap>;
+def atomic_cmp_swap_global_nortn : PatFrag<
+ (ops node:$ptr, node:$value),
+ (atomic_cmp_swap_global node:$ptr, node:$value),
+ [{ return SDValue(N, 0).use_empty(); }]
+>;
+
//===----------------------------------------------------------------------===//
// Misc Pattern Fragments
//===----------------------------------------------------------------------===//
def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;
-class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
- (vt (node i64:$addr, vt:$data)),
+class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt,
+ ValueType data_vt = vt> : Pat <
+ (vt (node i64:$addr, data_vt:$data)),
(inst $addr, $data, 0, 0)
>;
def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, atomic_cmp_swap_global, i32, v2i32>;
def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, atomic_cmp_swap_global, i64, v2i64>;
+
} // End Predicates = [isCIVI]
setOperationAction(ISD::FDIV, MVT::f32, Custom);
setOperationAction(ISD::FDIV, MVT::f64, Custom);
+ // BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling,
+ // and output demarshalling
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
+
+ // We can't return success/failure, only the old value,
+ // let LLVM add the comparison
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Expand);
+
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FMINNUM);
return LowerTrig(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::FDIV: return LowerFDIV(Op, DAG);
+ case ISD::ATOMIC_CMP_SWAP: return LowerATOMIC_CMP_SWAP(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::GlobalAddress: {
MachineFunction &MF = DAG.getMachineFunction();
}
}
+SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
+ AtomicSDNode *AtomicNode = cast<AtomicSDNode>(Op);
+ assert(AtomicNode->isCompareAndSwap());
+ unsigned AS = AtomicNode->getAddressSpace();
+
+ // No custom lowering required for local address space
+ if (!isFlatGlobalAddrSpace(AS))
+ return Op;
+
+ // Non-local address space requires custom lowering for atomic compare
+ // and swap; cmp and swap should be in a v2i32 or v2i64 in case of _X2
+ SDLoc DL(Op);
+ SDValue ChainIn = Op.getOperand(0);
+ SDValue Addr = Op.getOperand(1);
+ SDValue Old = Op.getOperand(2);
+ SDValue New = Op.getOperand(3);
+ EVT VT = Op.getValueType();
+ MVT SimpleVT = VT.getSimpleVT();
+ MVT VecType = MVT::getVectorVT(SimpleVT, 2);
+
+ SDValue NewOld = DAG.getNode(ISD::BUILD_VECTOR, DL, VecType,
+ New, Old);
+ SDValue Ops[] = { ChainIn, Addr, NewOld };
+ SDVTList VTList = DAG.getVTList(VT, MVT::Other);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::ATOMIC_CMP_SWAP, DL,
+ VTList, Ops, VT, AtomicNode->getMemOperand());
+}
+
//===----------------------------------------------------------------------===//
// Custom DAG optimizations
//===----------------------------------------------------------------------===//
if (!Node->hasAnyUseOfValue(0)) {
MI->setDesc(TII->get(NoRetAtomicOp));
MI->RemoveOperand(0);
+ return;
}
+ // For mubuf_atomic_cmpswap, we need to have tablegen use an extract_subreg
+ // instruction, because the return type of these instructions is a vec2 of
+ // the memory type, so it can be tied to the input operand.
+ // This means these instructions always have a use, so we need to add a
+ // special case to check if the atomic has only one extract_subreg use,
+ // which itself has no uses.
+ if ((Node->hasNUsesOfValue(1, 0) &&
+ Node->use_begin()->getMachineOpcode() == AMDGPU::EXTRACT_SUBREG &&
+ !Node->use_begin()->hasAnyUseOfValue(0))) {
+ unsigned Def = MI->getOperand(0).getReg();
+
+ // Change this into a noret atomic.
+ MI->setDesc(TII->get(NoRetAtomicOp));
+ MI->RemoveOperand(0);
+
+ // If we only remove the def operand from the atomic instruction, the
+ // extract_subreg will be left with a use of a vreg without a def.
+ // So we need to insert an implicit_def to avoid machine verifier
+ // errors.
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::IMPLICIT_DEF), Def);
+ }
return;
}
}
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool Signed) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
//def BUFFER_ATOMIC_FMIN : MUBUF_ <mubuf<0x3f>, "buffer_atomic_fmin", []>; // isn't on VI
//def BUFFER_ATOMIC_FMAX : MUBUF_ <mubuf<0x40>, "buffer_atomic_fmax", []>; // isn't on VI
//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <mubuf<0x50, 0x60>, "buffer_atomic_swap_x2", []>;
-//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <mubuf<0x51, 0x61>, "buffer_atomic_cmpswap_x2", []>;
+defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Atomic <
+ mubuf<0x51, 0x61>, "buffer_atomic_cmpswap_x2", VReg_128, v2i64, null_frag
+>;
//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <mubuf<0x52, 0x62>, "buffer_atomic_add_x2", []>;
//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <mubuf<0x53, 0x63>, "buffer_atomic_sub_x2", []>;
//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <mubuf<0x54>, "buffer_atomic_rsub_x2", []>; // isn't on CI & VI
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
+
+multiclass MUBUFCmpSwapPat <Instruction inst_addr64, Instruction inst_offset,
+ SDPatternOperator node, ValueType data_vt,
+ ValueType node_vt> {
+
+let Predicates = [isSI] in {
+ def : Pat <
+ (node_vt (node (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+ i16:$offset, i1:$slc), data_vt:$vdata_in)),
+ (EXTRACT_SUBREG
+ (inst_addr64 $vdata_in, $vaddr, $srsrc, $soffset, $offset, $slc), sub0)
+ >;
+
+}
+
+ def : Pat <
+ (node_vt (node (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
+ i1:$slc), data_vt:$vdata_in)),
+ (EXTRACT_SUBREG
+ (inst_offset $vdata_in, $srsrc, $soffset, $offset, $slc), sub0)
+ >;
+}
+
+defm : MUBUFCmpSwapPat <BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64,
+ BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET,
+ atomic_cmp_swap_global, v2i32, i32>;
+
+defm : MUBUFCmpSwapPat <BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64,
+ BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET,
+ atomic_cmp_swap_global, v2i64, i64>;
+
//===----------------------------------------------------------------------===//
// MTBUF Patterns
//===----------------------------------------------------------------------===//
ret void
}
+; CMP_SWAP
+
+; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_offset:
+; GCN: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
+ %0 = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
+; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_store_dword v[[RET]]
+define void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
+entry:
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
+ %0 = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ %1 = extractvalue { i32, i1 } %0, 0
+ store i32 %1, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
+; SI: buffer_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
+ %0 = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
+; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; GCN: buffer_store_dword v[[RET]]
+define void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
+ %0 = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
+ %1 = extractvalue { i32, i1 } %0, 0
+ store i32 %1, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_cmpxchg_i32:
+; GCN: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) {
+entry:
+ %0 = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret:
+; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_store_dword v[[RET]]
+define void @atomic_cmpxchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
+entry:
+ %0 = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
+ %1 = extractvalue { i32, i1 } %0, 0
+ store i32 %1, i32 addrspace(1)* %out2
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
+; SI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+; VI: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
+define void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %0 = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
+; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
+; GCN: buffer_store_dword v[[RET]]
+define void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %0 = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
+ %1 = extractvalue { i32, i1 } %0, 0
+ store i32 %1, i32 addrspace(1)* %out2
+ ret void
+}
+
; FUNC-LABEL: {{^}}atomic_xor_i32_offset:
; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {