let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
let glc_value = 0;
let dlc_value = 0;
+ let IsAtomicNoRet = 1;
let AsmMatchConverter = "cvtMubufAtomic";
}
let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret;
let glc_value = 1;
let dlc_value = 0;
+ let IsAtomicRet = 1;
let Constraints = "$vdata = $vdata_in";
let DisableEncoding = "$vdata_in";
let AsmMatchConverter = "cvtMubufAtomicReturn";
let has_data1 = 0;
let has_vdst = 0;
+ let IsAtomicNoRet = 1;
}
multiclass DS_1A1D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
" $addr, $data0, $data1$offset$gds"> {
let has_vdst = 0;
+ let IsAtomicNoRet = 1;
}
multiclass DS_1A2D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
let hasPostISelHook = 1;
let has_data1 = 0;
+ let IsAtomicRet = 1;
}
multiclass DS_1A1D_RET_mc <string opName, RegisterClass rc = VGPR_32,
" $vdst, $addr, $data0, $data1$offset$gds"> {
let hasPostISelHook = 1;
+ let IsAtomicRet = 1;
}
multiclass DS_1A2D_RET_mc<string opName,
let dlcValue = 0;
let has_vdst = 0;
let maybeAtomic = 1;
+ let IsAtomicNoRet = 1;
}
class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
let has_vdst = 1;
let glcValue = 1;
let dlcValue = 0;
+ let IsAtomicNoRet = 0;
+ let IsAtomicRet = 1;
let PseudoInstr = NAME # "_RTN";
}
bit Coordinates = 1;
bit LodOrClampOrMip = 0;
bit HasD16 = 0;
+ bit IsAtomicRet = 0;
}
def MIMGBaseOpcode : GenericEnum {
}
multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0> { // 64-bit atomics
- def "" : MIMGBaseOpcode {
- let Atomic = 1;
- let AtomicX2 = isCmpSwap;
- }
+ let IsAtomicRet = 1 in {
+ def "" : MIMGBaseOpcode {
+ let Atomic = 1;
+ let AtomicX2 = isCmpSwap;
+ }
- let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {
- // _V* variants have different dst size, but the size is encoded implicitly,
- // using dmask and tfe. Only 32-bit variant is registered with disassembler.
- // Other variants are reconstructed by disassembler using dmask and tfe.
- let VDataDwords = !if(isCmpSwap, 2, 1) in
- defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP>;
- let VDataDwords = !if(isCmpSwap, 4, 2) in
- defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP>;
- }
+ let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {
+ // _V* variants have different dst size, but the size is encoded implicitly,
+ // using dmask and tfe. Only 32-bit variant is registered with disassembler.
+ // Other variants are reconstructed by disassembler using dmask and tfe.
+ let VDataDwords = !if(isCmpSwap, 2, 1) in
+ defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP>;
+ let VDataDwords = !if(isCmpSwap, 4, 2) in
+ defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP>;
+ }
+ } // End IsAtomicRet = 1
}
class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
IsDOT = UINT64_C(1) << 55,
// FLAT instruction accesses FLAT_SCRATCH segment.
- IsFlatScratch = UINT64_C(1) << 56
+ IsFlatScratch = UINT64_C(1) << 56,
+
+ // Atomic without return.
+ IsAtomicNoRet = UINT64_C(1) << 57,
+
+ // Atomic with return.
+ IsAtomicRet = UINT64_C(1) << 58
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
return false;
if (!MI.mayLoad() || MI.mayStore())
return false;
- if (AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1 ||
- AMDGPU::getAtomicRetOp(MI.getOpcode()) != -1)
+ if (SIInstrInfo::isAtomic(MI))
return false;
if (IsVMEMClause && !isVMEMClauseInst(MI))
return false;
AMDGPU::OpName::data1),
CurrScore);
}
- } else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1 &&
+ } else if (SIInstrInfo::isAtomicRet(Inst) &&
Inst.getOpcode() != AMDGPU::DS_GWS_INIT &&
Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_V &&
Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_BR &&
&Inst, TII, TRI, MRI,
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
CurrScore);
- } else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
+ } else if (SIInstrInfo::isAtomicRet(Inst)) {
setExpScore(
&Inst, TII, TRI, MRI,
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
} else if (TII->isMIMG(Inst)) {
if (Inst.mayStore()) {
setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
- } else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
+ } else if (SIInstrInfo::isAtomicRet(Inst)) {
setExpScore(
&Inst, TII, TRI, MRI,
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
} else if (TII->isMUBUF(Inst)) {
if (Inst.mayStore()) {
setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
- } else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
+ } else if (SIInstrInfo::isAtomicRet(Inst)) {
setExpScore(
&Inst, TII, TRI, MRI,
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
++FlatASCount;
if (!ST->hasVscnt())
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
- else if (Inst.mayLoad() &&
- AMDGPU::getAtomicRetOp(Inst.getOpcode()) == -1)
+ else if (Inst.mayLoad() && !SIInstrInfo::isAtomicNoRet(Inst))
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_READ_ACCESS, Inst);
else
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_WRITE_ACCESS, Inst);
Inst.getOpcode() != AMDGPU::BUFFER_GL1_INV) {
if (!ST->hasVscnt())
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
- else if ((Inst.mayLoad() &&
- AMDGPU::getAtomicRetOp(Inst.getOpcode()) == -1) ||
+ else if ((Inst.mayLoad() && !SIInstrInfo::isAtomicNoRet(Inst)) ||
/* IMAGE_GET_RESINFO / IMAGE_GET_LOD */
(TII->isMIMG(Inst) && !Inst.mayLoad() && !Inst.mayStore()))
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_READ_ACCESS, Inst);
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_WRITE_ACCESS, Inst);
if (ST->vmemWriteNeedsExpWaitcnt() &&
- (Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1)) {
+ (Inst.mayStore() || SIInstrInfo::isAtomicRet(Inst))) {
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst);
}
} else if (TII->isSMRD(Inst)) {
// Must be 0 for non-FLAT instructions.
field bit IsFlatScratch = 0;
+ // Atomic without a return.
+ field bit IsAtomicNoRet = 0;
+
+ // Atomic with return.
+ field bit IsAtomicRet = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
let TSFlags{56} = IsFlatScratch;
+ let TSFlags{57} = IsAtomicNoRet;
+
+ let TSFlags{58} = IsAtomicRet;
+
let SchedRW = [Write32Bit];
let AsmVariantName = AMDGPUAsmVariants.Default;
return get(Opcode).TSFlags & SIInstrFlags::EXP;
}
+ static bool isAtomicNoRet(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet;
+ }
+
+ bool isAtomicNoRet(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet;
+ }
+
+ static bool isAtomicRet(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet;
+ }
+
+ bool isAtomicRet(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet;
+ }
+
+ static bool isAtomic(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet |
+ SIInstrFlags::IsAtomicNoRet);
+ }
+
+ bool isAtomic(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet |
+ SIInstrFlags::IsAtomicNoRet);
+ }
+
static bool isWQM(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::WQM;
}
LLVM_READONLY
int getMUBUFNoLdsInst(uint16_t Opcode);
- LLVM_READONLY
- int getAtomicRetOp(uint16_t Opcode);
-
LLVM_READONLY
int getAtomicNoRetOp(uint16_t Opcode);
let ValueCols = [["0"]];
}
-// Maps an atomic opcode to its version with a return value.
-def getAtomicRetOp : InstrMapping {
- let FilterClass = "AtomicNoRet";
- let RowFields = ["NoRetOp"];
- let ColFields = ["IsRet"];
- let KeyCol = ["0"];
- let ValueCols = [["1"]];
-}
-
// Maps an atomic opcode to its returnless version.
def getAtomicNoRetOp : InstrMapping {
let FilterClass = "AtomicNoRet";
/// Return true iff instruction \p MI is a atomic instruction that
/// returns a result.
bool isAtomicRet(const MachineInstr &MI) const {
- return AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1;
+ return SIInstrInfo::isAtomicRet(MI);
}
/// Removes all processed atomic pseudo instructions from the current
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: s_endpgm
;
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm
; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1
-; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-WGP-NEXT: buffer_gl0_inv
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
; GFX10-WGP-NEXT: s_endpgm