From: Jay Foad Date: Mon, 14 Feb 2022 17:50:21 +0000 (+0000) Subject: [AMDGPU] Honor !invariant.load metadata on load-like intrinsics X-Git-Tag: upstream/15.0.7~16456 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f72d8897acfcc3c8476d70353e2c4f91615ff4f0;p=platform%2Fupstream%2Fllvm.git [AMDGPU] Honor !invariant.load metadata on load-like intrinsics Differential Revision: https://reviews.llvm.org/D119739 --- diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ea73fea..0a02f64 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1120,6 +1120,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &CI, MachineFunction &MF, unsigned IntrID) const { + Info.flags = MachineMemOperand::MONone; + if (CI.hasMetadata(LLVMContext::MD_invariant_load)) + Info.flags |= MachineMemOperand::MOInvariant; + if (const AMDGPU::RsrcIntrinsic *RsrcIntr = AMDGPU::lookupRsrcIntrinsic(IntrID)) { AttributeList Attr = Intrinsic::getAttributes(CI.getContext(), @@ -1138,7 +1142,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, MFI->getBufferPSV(*MF.getSubtarget().getInstrInfo()); } - Info.flags = MachineMemOperand::MODereferenceable; + Info.flags |= MachineMemOperand::MODereferenceable; if (Attr.hasFnAttr(Attribute::ReadOnly)) { unsigned DMaskLanes = 4; @@ -1180,9 +1184,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.opc = CI.getType()->isVoidTy() ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType()); - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable; + Info.flags |= MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; // XXX - Should this be volatile without known ordering? Info.flags |= MachineMemOperand::MOVolatile; @@ -1202,7 +1206,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(CI.getType()); Info.ptrVal = CI.getOperand(0); Info.align.reset(); - Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; const ConstantInt *Vol = cast(CI.getOperand(4)); if (!Vol->isZero()) @@ -1218,7 +1222,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = MFI->getBufferPSV(*MF.getSubtarget().getInstrInfo()); Info.align.reset(); - Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; const ConstantInt *Vol = dyn_cast(CI.getOperand(4)); if (!Vol || !Vol->isZero()) @@ -1232,7 +1236,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(CI.getType()); Info.ptrVal = CI.getOperand(0); Info.align.reset(); - Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; const ConstantInt *Vol = cast(CI.getOperand(1)); if (!Vol->isZero()) @@ -1245,9 +1249,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(CI.getType()); Info.ptrVal = CI.getOperand(0); Info.align.reset(); - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MOVolatile; + Info.flags |= MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MOVolatile; return true; } case Intrinsic::amdgcn_image_bvh_intersect_ray: { @@ -1257,8 +1261,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = MFI->getImagePSV(*MF.getSubtarget().getInstrInfo()); Info.align.reset(); - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MODereferenceable; + Info.flags |= MachineMemOperand::MOLoad | + MachineMemOperand::MODereferenceable; return true; } case Intrinsic::amdgcn_global_atomic_fadd: @@ -1271,10 +1275,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(CI.getType()); Info.ptrVal = CI.getOperand(0); Info.align.reset(); - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable | - MachineMemOperand::MOVolatile; + Info.flags |= MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOVolatile; return true; } case Intrinsic::amdgcn_ds_gws_init: @@ -1294,9 +1298,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.size = 4; Info.align = Align(4); - Info.flags = MachineMemOperand::MOStore; if (IntrID == Intrinsic::amdgcn_ds_gws_barrier) - Info.flags = MachineMemOperand::MOLoad; + Info.flags |= MachineMemOperand::MOLoad; + else + Info.flags |= MachineMemOperand::MOStore; return true; } default: diff --git a/llvm/test/CodeGen/AMDGPU/invariant-image-load.ll b/llvm/test/CodeGen/AMDGPU/invariant-image-load.ll index a315735..3b2a4c5 100644 --- a/llvm/test/CodeGen/AMDGPU/invariant-image-load.ll +++ b/llvm/test/CodeGen/AMDGPU/invariant-image-load.ll @@ -8,13 +8,13 @@ define amdgpu_ps void @test(<8 x i32> inreg %load, <8 x i32> inreg %store) { ; GFX9-LABEL: test: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: image_load v1, v0, s[0:7] dmask:0x1 unorm -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: image_store v1, v0, s[8:15] dmask:0x1 unorm -; GFX9-NEXT: v_mov_b32_e32 v0, 1 -; GFX9-NEXT: image_load v1, v0, s[0:7] dmask:0x1 unorm -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: image_store v1, v0, s[8:15] dmask:0x1 unorm +; GFX9-NEXT: v_mov_b32_e32 v1, 1 +; GFX9-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm +; GFX9-NEXT: image_load v3, v1, s[0:7] dmask:0x1 unorm +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: image_store v2, v0, s[8:15] dmask:0x1 unorm +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: image_store v3, v1, s[8:15] dmask:0x1 unorm ; GFX9-NEXT: s_endpgm %data0 = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 1, i32 0, <8 x i32> %load, i32 0, i32 0), !invariant.load !0 call void @llvm.amdgcn.image.store.1d.f32.i32(float %data0, i32 1, i32 0, <8 x i32> %store, i32 0, i32 0)