return isLocalLoad(dyn_cast<LoadSDNode>(N));
}]>;
+def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value),
+ (atomic_load_add node:$ptr, node:$value), [{
+ return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+}]>;
+
def mskor_global : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUstore_mskor node:$val, node:$ptr), [{
return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
switch (MI->getOpcode()) {
default:
- if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::LDS_1A) {
- MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
- TII->get(MI->getOpcode()),
- AMDGPU::OQAP);
+ if (TII->isLDSInstr(MI->getOpcode()) &&
+ TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst) != -1) {
+ int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+ assert(DstIdx != -1);
+ MachineInstrBuilder NewMI;
+ if (!MRI.use_empty(MI->getOperand(DstIdx).getReg())) {
+ NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()),
+ AMDGPU::OQAP);
+ TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
+ MI->getOperand(0).getReg(),
+ AMDGPU::OQAP);
+ } else {
+ NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
+ TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
+ }
for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
NewMI.addOperand(MI->getOperand(i));
}
- TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
- MI->getOperand(0).getReg(),
- AMDGPU::OQAP);
} else {
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
}
void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
};
+namespace AMDGPU {
+
+int getLDSNoRetOp(uint16_t Opcode);
+
+} //End namespace AMDGPU
+
} // End llvm namespace
#endif // R600INSTRINFO_H_
let DisableEncoding = "$dst";
}
-class R600_LDS_1A1D <bits<6> lds_op, string name, list<dag> pattern> :
+class R600_LDS_1A1D <bits<6> lds_op, dag outs, string name, list<dag> pattern,
+ string dst =""> :
R600_LDS <
- lds_op,
- (outs),
+ lds_op, outs,
(ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel,
LAST:$last, R600_Pred:$pred_sel,
BANK_SWIZZLE:$bank_swizzle),
- " "#name#" $last $src0$src0_rel, $src1$src1_rel, $pred_sel",
+ " "#name#" $last "#dst#"$src0$src0_rel, $src1$src1_rel, $pred_sel",
pattern
> {
+ field string BaseOp;
+
let src2 = 0;
let src2_rel = 0;
let LDS_1A1D = 1;
}
+class R600_LDS_1A1D_NORET <bits<6> lds_op, string name, list<dag> pattern> :
+ R600_LDS_1A1D <lds_op, (outs), name, pattern> {
+ let BaseOp = name;
+}
+
+class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> :
+ R600_LDS_1A1D <lds_op, (outs R600_Reg32:$dst), name##"_RET", pattern, "OQAP, "> {
+
+ let BaseOp = name;
+ let usesCustomInserter = 1;
+ let DisableEncoding = "$dst";
+ let Defs = [OQAP];
+}
+
class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> :
R600_LDS <
lds_op,
let LDS_1A2D = 1;
}
-def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE",
+def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >;
+def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE",
[(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)]
>;
-def LDS_BYTE_WRITE : R600_LDS_1A1D<0x12, "LDS_BYTE_WRITE",
+def LDS_BYTE_WRITE : R600_LDS_1A1D_NORET<0x12, "LDS_BYTE_WRITE",
[(truncstorei8_local i32:$src1, i32:$src0)]
>;
-def LDS_SHORT_WRITE : R600_LDS_1A1D<0x13, "LDS_SHORT_WRITE",
+def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE",
[(truncstorei16_local i32:$src1, i32:$src0)]
>;
+def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD",
+ [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))]
+>;
def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
[(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
>;
def : DwordAddrPat <i32, R600_Reg32>;
} // End isR600toCayman Predicate
+
+def getLDSNoRetOp : InstrMapping {
+ let FilterClass = "R600_LDS_1A1D";
+ let RowFields = ["BaseOp"];
+ let ColFields = ["DisableEncoding"];
+ let KeyCol = ["$dst"];
+ let ValueCols = [[""""]];
+}
let vdst = 0;
}
+class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS <
+ op,
+ (outs rc:$vdst),
+ (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, i8imm:$offset0,
+ i8imm:$offset1),
+ asm#" $gds, $vdst, $addr, $data0, $offset0, $offset1, [M0]",
+ []> {
+ let mayStore = 1;
+ let mayLoad = 1;
+ let data1 = 0;
+}
+
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
op,
(outs),
} // End isCompare = 1
+def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>;
def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
def : DSWritePat <DS_WRITE_B32, i32, local_store>;
+def : Pat <(atomic_load_add_local i32:$ptr, i32:$val),
+ (DS_ADD_U32_RTN 0, $ptr, $val, 0, 0)>;
+
/********** ================== **********/
/********** SMRD Patterns **********/
/********** ================== **********/
NeedWQM = true;
// Fall through
case AMDGPU::DS_WRITE_B32:
+ case AMDGPU::DS_ADD_U32_RTN:
NeedM0 = true;
break;
--- /dev/null
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+
+; R600-CHECK-LABEL: @atomic_add_local
+; R600-CHECK: LDS_ADD *
+; SI-CHECK-LABEL: @atomic_add_local
+; SI-CHECK: DS_ADD_U32_RTN 0
+define void @atomic_add_local(i32 addrspace(3)* %local) {
+entry:
+ %0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
+ ret void
+}
+
+; R600-CHECK-LABEL: @atomic_add_ret_local
+; R600-CHECK: LDS_ADD_RET *
+; SI-CHECK-LABEL: @atomic_add_ret_local
+; SI-CHECK: DS_ADD_U32_RTN 0
+define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
+entry:
+ %0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}