From ec9628318d797bfe036aca314d58665dd93b364f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 16 Jan 2020 22:27:31 -0500 Subject: [PATCH] AMDGPU/GlobalISel: Select DS append/consume --- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 75 ++++++++++++++++------ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 9 ++- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 14 +++- .../AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll | 4 ++ .../AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll | 4 ++ .../GlobalISel/regbankselect-amdgcn.ds.append.mir | 8 +-- .../GlobalISel/regbankselect-amdgcn.ds.consume.mir | 8 +-- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll | 28 ++++---- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll | 4 +- 9 files changed, 108 insertions(+), 46 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 5ab8d2d..a10c1ce 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1197,6 +1197,36 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI, return true; } +bool AMDGPUInstructionSelector::selectDSAppendConsume(MachineInstr &MI, + bool IsAppend) const { + Register PtrBase = MI.getOperand(2).getReg(); + LLT PtrTy = MRI->getType(PtrBase); + bool IsGDS = PtrTy.getAddressSpace() == AMDGPUAS::REGION_ADDRESS; + + unsigned Offset; + std::tie(PtrBase, Offset) = selectDS1Addr1OffsetImpl(MI.getOperand(2)); + + // TODO: Should this try to look through readfirstlane like GWS? + if (!isDSOffsetLegal(PtrBase, Offset, 16)) { + PtrBase = MI.getOperand(2).getReg(); + Offset = 0; + } + + MachineBasicBlock *MBB = MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME; + + BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) + .addReg(PtrBase); + BuildMI(*MBB, &MI, DL, TII.get(Opc), MI.getOperand(0).getReg()) + .addImm(Offset) + .addImm(IsGDS ? -1 : 0) + .cloneMemRefs(MI); + + MI.eraseFromParent(); + return true; +} + bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); @@ -1230,6 +1260,10 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( case Intrinsic::amdgcn_ds_gws_sema_p: case Intrinsic::amdgcn_ds_gws_sema_release_all: return selectDSGWSIntrinsic(I, IntrinsicID); + case Intrinsic::amdgcn_ds_append: + return selectDSAppendConsume(I, true); + case Intrinsic::amdgcn_ds_consume: + return selectDSAppendConsume(I, false); default: return selectImpl(I, *CoverageInfo); } @@ -2248,8 +2282,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { }}}; } -bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI, - const MachineOperand &Base, +bool AMDGPUInstructionSelector::isDSOffsetLegal(Register Base, int64_t Offset, unsigned OffsetBits) const { if ((OffsetBits == 16 && !isUInt<16>(Offset)) || @@ -2261,7 +2294,7 @@ bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI, // On Southern Islands instruction with a negative base value and an offset // don't seem to work. - return KnownBits->signBitIsZero(Base.getReg()); + return KnownBits->signBitIsZero(Base); } InstructionSelector::ComplexRendererFns @@ -2292,15 +2325,11 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset( }}; } -InstructionSelector::ComplexRendererFns -AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const { +std::pair +AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(MachineOperand &Root) const { const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg()); - if (!RootDef) { - return {{ - [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } - }}; - } + if (!RootDef) + return std::make_pair(Root.getReg(), 0); int64_t ConstAddr = 0; if (isBaseWithConstantOffset(Root, *MRI)) { @@ -2311,26 +2340,32 @@ AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const { if (LHSDef && RHSDef) { int64_t PossibleOffset = RHSDef->getOperand(1).getCImm()->getSExtValue(); - if (isDSOffsetLegal(*MRI, LHS, PossibleOffset, 16)) { + if (isDSOffsetLegal(LHS.getReg(), PossibleOffset, 16)) { // (add n0, c0) - return {{ - [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); } - }}; + return std::make_pair(LHS.getReg(), PossibleOffset); } } } else if (RootDef->getOpcode() == AMDGPU::G_SUB) { - + // TODO } else if (mi_match(Root.getReg(), *MRI, m_ICst(ConstAddr))) { - + // TODO } + return std::make_pair(Root.getReg(), 0); +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const { + + Register Reg; + unsigned Offset; + std::tie(Reg, Offset) = selectDS1Addr1OffsetImpl(Root); return {{ - [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } + [=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } }}; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 6f52686..89d5595 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -102,6 +102,7 @@ private: bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const; bool selectDSOrderedIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const; bool selectDSGWSIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const; + bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const; bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const; int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const; @@ -164,9 +165,11 @@ private: InstructionSelector::ComplexRendererFns selectMUBUFScratchOffset(MachineOperand &Root) const; - bool isDSOffsetLegal(const MachineRegisterInfo &MRI, - const MachineOperand &Base, - int64_t Offset, unsigned OffsetBits) const; + bool isDSOffsetLegal(Register Base, int64_t Offset, + unsigned OffsetBits) const; + + std::pair + selectDS1Addr1OffsetImpl(MachineOperand &Src) const; InstructionSelector::ComplexRendererFns selectDS1Addr1Offset(MachineOperand &Root) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index ebe15c9..48f49c7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2147,6 +2147,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl( constrainOpWithReadfirstlane(MI, MRI, 1); // M0 return; } + case Intrinsic::amdgcn_ds_append: + case Intrinsic::amdgcn_ds_consume: { + constrainOpWithReadfirstlane(MI, MRI, 2); // M0 + return; + } case Intrinsic::amdgcn_s_sendmsg: case Intrinsic::amdgcn_s_sendmsghalt: { // FIXME: Should this use a waterfall loop? @@ -3080,8 +3085,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } - case Intrinsic::amdgcn_ds_append: - case Intrinsic::amdgcn_ds_consume: case Intrinsic::amdgcn_ds_fadd: case Intrinsic::amdgcn_ds_fmin: case Intrinsic::amdgcn_ds_fmax: @@ -3098,6 +3101,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); break; } + case Intrinsic::amdgcn_ds_append: + case Intrinsic::amdgcn_ds_consume: { + unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + break; + } case Intrinsic::amdgcn_exp_compr: OpdsMapping[0] = nullptr; // IntrinsicID // FIXME: These are immediate values which can't be read from registers. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll new file mode 100644 index 0000000..8287a60 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll @@ -0,0 +1,4 @@ +; XUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,SI,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll +; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll +; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll new file mode 100644 index 0000000..c755c37 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll @@ -0,0 +1,4 @@ +; XUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,SI,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll +; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll +; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,GFX9 %S/../llvm.amdgcn.ds.append.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir index 0267f76..51215fb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir @@ -12,8 +12,7 @@ body: | ; CHECK-LABEL: name: ds_append_s ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY1]](p3), 0 + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY]](p3), 0 %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0 @@ -28,8 +27,9 @@ body: | liveins: $vgpr0 ; CHECK-LABEL: name: ds_append_v ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY]](p3), 0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(p3) = COPY $vgpr0 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(p3) = V_READFIRSTLANE_B32 [[COPY]](p3), implicit $exec + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[V_READFIRSTLANE_B32_]](p3), 0 %0:_(p3) = COPY $vgpr0 %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir index 50dd920..c3cc88e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir @@ -12,8 +12,7 @@ body: | ; CHECK-LABEL: name: ds_consume_s ; CHECK: liveins: $sgpr0 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY1]](p3), 0 + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY]](p3), 0 %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0 @@ -28,8 +27,9 @@ body: | liveins: $vgpr0 ; CHECK-LABEL: name: ds_consume_v ; CHECK: liveins: $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 - ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY]](p3), 0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(p3) = COPY $vgpr0 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(p3) = V_READFIRSTLANE_B32 [[COPY]](p3), implicit $exec + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[V_READFIRSTLANE_B32_]](p3), 0 %0:_(p3) = COPY $vgpr0 %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll index 9f7aa5e..ce1551e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9 %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9,GCN-SDAG %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-SDAG,GCN-SDAG %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-SDAG,GCN-SDAG %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,CIPLUS-SDAG,GCN-SDAG %s ; GCN-LABEL: {{^}}ds_append_lds: ; GCN: s_load_dword [[PTR:s[0-9]+]] @@ -51,10 +51,13 @@ define amdgpu_kernel void @ds_append_no_fold_offset_si(i32 addrspace(3)* addrspa ; GCN-LABEL: {{^}}ds_append_lds_over_max_offset: ; GCN: s_load_dword [[PTR:s[0-9]+]] -; SI: s_bitset1_b32 [[PTR]], 16 -; CIPLUS: s_add_i32 [[PTR]], [[PTR]], 0x10000 +; SI-SDAG: s_bitset1_b32 [[PTR]], 16 +; CIPLUS-SDAG: s_add_i32 [[PTR]], [[PTR]], 0x10000 +; GCN-SDAG: s_mov_b32 m0, [[PTR]] + +; SI-GISEL: s_bitset1_b32 m0, 16 +; CIPLUS-GISEL: s_add_u32 m0, [[PTR]], 0x10000 -; GCN: s_mov_b32 m0, [[PTR]] ; GCN: ds_append [[RESULT:v[0-9]+]]{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] @@ -66,8 +69,11 @@ define amdgpu_kernel void @ds_append_lds_over_max_offset(i32 addrspace(3)* %lds, } ; GCN-LABEL: {{^}}ds_append_lds_vgpr_addr: -; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0 -; GCN: s_mov_b32 m0, [[READLANE]] +; GCN-SDAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0 +; GCN-SDAG: s_mov_b32 m0, [[READLANE]] + +; GCN-GISEL: v_readfirstlane_b32 m0, v0 + ; GCN: ds_append [[RESULT:v[0-9]+]]{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] @@ -127,8 +133,8 @@ define amdgpu_kernel void @ds_append_lds_m0_restore(i32 addrspace(3)* %lds, i32 ret void } -declare i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* nocapture, i1) #1 -declare i32 @llvm.amdgcn.ds.append.p2i32(i32 addrspace(2)* nocapture, i1) #1 +declare i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* nocapture, i1 immarg) #1 +declare i32 @llvm.amdgcn.ds.append.p2i32(i32 addrspace(2)* nocapture, i1 immarg) #1 attributes #0 = { nounwind } attributes #1 = { argmemonly convergent nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll index 415b80a..175c0cf 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll @@ -127,8 +127,8 @@ define amdgpu_kernel void @ds_consume_lds_m0_restore(i32 addrspace(3)* %lds, i32 ret void } -declare i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* nocapture, i1) #1 -declare i32 @llvm.amdgcn.ds.consume.p2i32(i32 addrspace(2)* nocapture, i1) #1 +declare i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* nocapture, i1 immarg) #1 +declare i32 @llvm.amdgcn.ds.consume.p2i32(i32 addrspace(2)* nocapture, i1 immarg) #1 attributes #0 = { nounwind } attributes #1 = { argmemonly convergent nounwind } -- 2.7.4