return true;
}
+bool AMDGPUInstructionSelector::selectDSAppendConsume(MachineInstr &MI,
+ bool IsAppend) const {
+ Register PtrBase = MI.getOperand(2).getReg();
+ LLT PtrTy = MRI->getType(PtrBase);
+ bool IsGDS = PtrTy.getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
+
+ unsigned Offset;
+ std::tie(PtrBase, Offset) = selectDS1Addr1OffsetImpl(MI.getOperand(2));
+
+ // TODO: Should this try to look through readfirstlane like GWS?
+ if (!isDSOffsetLegal(PtrBase, Offset, 16)) {
+ PtrBase = MI.getOperand(2).getReg();
+ Offset = 0;
+ }
+
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
+
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .addReg(PtrBase);
+ BuildMI(*MBB, &MI, DL, TII.get(Opc), MI.getOperand(0).getReg())
+ .addImm(Offset)
+ .addImm(IsGDS ? -1 : 0)
+ .cloneMemRefs(MI);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
case Intrinsic::amdgcn_ds_gws_sema_p:
case Intrinsic::amdgcn_ds_gws_sema_release_all:
return selectDSGWSIntrinsic(I, IntrinsicID);
+ case Intrinsic::amdgcn_ds_append:
+ return selectDSAppendConsume(I, true);
+ case Intrinsic::amdgcn_ds_consume:
+ return selectDSAppendConsume(I, false);
default:
return selectImpl(I, *CoverageInfo);
}
}}};
}
-bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI,
- const MachineOperand &Base,
+bool AMDGPUInstructionSelector::isDSOffsetLegal(Register Base,
int64_t Offset,
unsigned OffsetBits) const {
if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
// On Southern Islands instruction with a negative base value and an offset
// don't seem to work.
- return KnownBits->signBitIsZero(Base.getReg());
+ return KnownBits->signBitIsZero(Base);
}
InstructionSelector::ComplexRendererFns
}};
}
-InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
+std::pair<Register, unsigned>
+AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(MachineOperand &Root) const {
const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg());
- if (!RootDef) {
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
- }};
- }
+ if (!RootDef)
+ return std::make_pair(Root.getReg(), 0);
int64_t ConstAddr = 0;
if (isBaseWithConstantOffset(Root, *MRI)) {
if (LHSDef && RHSDef) {
int64_t PossibleOffset =
RHSDef->getOperand(1).getCImm()->getSExtValue();
- if (isDSOffsetLegal(*MRI, LHS, PossibleOffset, 16)) {
+ if (isDSOffsetLegal(LHS.getReg(), PossibleOffset, 16)) {
// (add n0, c0)
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); }
- }};
+ return std::make_pair(LHS.getReg(), PossibleOffset);
}
}
} else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
-
+ // TODO
} else if (mi_match(Root.getReg(), *MRI, m_ICst(ConstAddr))) {
-
+ // TODO
}
+ return std::make_pair(Root.getReg(), 0);
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
+
+ Register Reg;
+ unsigned Offset;
+ std::tie(Reg, Offset) = selectDS1Addr1OffsetImpl(Root);
return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }
}};
}
bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const;
bool selectDSOrderedIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
bool selectDSGWSIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
+ bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const;
int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;
InstructionSelector::ComplexRendererFns
selectMUBUFScratchOffset(MachineOperand &Root) const;
- bool isDSOffsetLegal(const MachineRegisterInfo &MRI,
- const MachineOperand &Base,
- int64_t Offset, unsigned OffsetBits) const;
+ bool isDSOffsetLegal(Register Base, int64_t Offset,
+ unsigned OffsetBits) const;
+
+ std::pair<Register, unsigned>
+ selectDS1Addr1OffsetImpl(MachineOperand &Src) const;
InstructionSelector::ComplexRendererFns
selectDS1Addr1Offset(MachineOperand &Root) const;
constrainOpWithReadfirstlane(MI, MRI, 1); // M0
return;
}
+ case Intrinsic::amdgcn_ds_append:
+ case Intrinsic::amdgcn_ds_consume: {
+ constrainOpWithReadfirstlane(MI, MRI, 2); // M0
+ return;
+ }
case Intrinsic::amdgcn_s_sendmsg:
case Intrinsic::amdgcn_s_sendmsghalt: {
// FIXME: Should this use a waterfall loop?
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
}
- case Intrinsic::amdgcn_ds_append:
- case Intrinsic::amdgcn_ds_consume:
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax:
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
break;
}
+ case Intrinsic::amdgcn_ds_append:
+ case Intrinsic::amdgcn_ds_consume: {
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ break;
+ }
case Intrinsic::amdgcn_exp_compr:
OpdsMapping[0] = nullptr; // IntrinsicID
// FIXME: These are immediate values which can't be read from registers.
--- /dev/null
+; XUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,SI,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll
--- /dev/null
+; XUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,SI,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,GFX9 %S/../llvm.amdgcn.ds.append.ll
; CHECK-LABEL: name: ds_append_s
; CHECK: liveins: $sgpr0
; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
- ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY1]](p3), 0
+ ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY]](p3), 0
%0:_(p3) = COPY $sgpr0
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
liveins: $vgpr0
; CHECK-LABEL: name: ds_append_v
; CHECK: liveins: $vgpr0
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
- ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY]](p3), 0
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(p3) = COPY $vgpr0
+ ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(p3) = V_READFIRSTLANE_B32 [[COPY]](p3), implicit $exec
+ ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[V_READFIRSTLANE_B32_]](p3), 0
%0:_(p3) = COPY $vgpr0
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
; CHECK-LABEL: name: ds_consume_s
; CHECK: liveins: $sgpr0
; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
- ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY1]](p3), 0
+ ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY]](p3), 0
%0:_(p3) = COPY $sgpr0
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
liveins: $vgpr0
; CHECK-LABEL: name: ds_consume_v
; CHECK: liveins: $vgpr0
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
- ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY]](p3), 0
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(p3) = COPY $vgpr0
+ ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(p3) = V_READFIRSTLANE_B32 [[COPY]](p3), implicit $exec
+ ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[V_READFIRSTLANE_B32_]](p3), 0
%0:_(p3) = COPY $vgpr0
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9 %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9,GCN-SDAG %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-SDAG,GCN-SDAG %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-SDAG,GCN-SDAG %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,CIPLUS-SDAG,GCN-SDAG %s
; GCN-LABEL: {{^}}ds_append_lds:
; GCN: s_load_dword [[PTR:s[0-9]+]]
; GCN-LABEL: {{^}}ds_append_lds_over_max_offset:
; GCN: s_load_dword [[PTR:s[0-9]+]]
-; SI: s_bitset1_b32 [[PTR]], 16
-; CIPLUS: s_add_i32 [[PTR]], [[PTR]], 0x10000
+; SI-SDAG: s_bitset1_b32 [[PTR]], 16
+; CIPLUS-SDAG: s_add_i32 [[PTR]], [[PTR]], 0x10000
+; GCN-SDAG: s_mov_b32 m0, [[PTR]]
+
+; SI-GISEL: s_bitset1_b32 m0, 16
+; CIPLUS-GISEL: s_add_u32 m0, [[PTR]], 0x10000
-; GCN: s_mov_b32 m0, [[PTR]]
; GCN: ds_append [[RESULT:v[0-9]+]]{{$}}
; GCN-NOT: buffer_wbinvl1
; GCN: {{.*}}store{{.*}} [[RESULT]]
}
; GCN-LABEL: {{^}}ds_append_lds_vgpr_addr:
-; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
-; GCN: s_mov_b32 m0, [[READLANE]]
+; GCN-SDAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
+; GCN-SDAG: s_mov_b32 m0, [[READLANE]]
+
+; GCN-GISEL: v_readfirstlane_b32 m0, v0
+
; GCN: ds_append [[RESULT:v[0-9]+]]{{$}}
; GCN-NOT: buffer_wbinvl1
; GCN: {{.*}}store{{.*}} [[RESULT]]
ret void
}
-declare i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* nocapture, i1) #1
-declare i32 @llvm.amdgcn.ds.append.p2i32(i32 addrspace(2)* nocapture, i1) #1
+declare i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* nocapture, i1 immarg) #1
+declare i32 @llvm.amdgcn.ds.append.p2i32(i32 addrspace(2)* nocapture, i1 immarg) #1
attributes #0 = { nounwind }
attributes #1 = { argmemonly convergent nounwind }
ret void
}
-declare i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* nocapture, i1) #1
-declare i32 @llvm.amdgcn.ds.consume.p2i32(i32 addrspace(2)* nocapture, i1) #1
+declare i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* nocapture, i1 immarg) #1
+declare i32 @llvm.amdgcn.ds.consume.p2i32(i32 addrspace(2)* nocapture, i1 immarg) #1
attributes #0 = { nounwind }
attributes #1 = { argmemonly convergent nounwind }