//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
MemoryDependenceResults *MDR;
LoopInfo *LI;
DenseMap<Value*, GetElementPtrInst*> noClobberClones;
- bool isKernelFunc;
+ bool isEntryFunc;
public:
static char ID;
auto isGlobalLoad = [&](LoadInst &Load)->bool {
return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
};
- // We're tracking up to the Function boundaries
- // We cannot go beyond because of FunctionPass restrictions
- // Thus we can ensure that memory not clobbered for memory
- // operations that live in kernel only.
- bool NotClobbered = isKernelFunc && !isClobberedInFunction(&I);
+ // We're tracking up to the Function boundaries, and cannot go beyond because
+ // of FunctionPass restrictions. We can ensure that is memory not clobbered
+ // for memory operations that are live in to entry points only.
+ bool NotClobbered = isEntryFunc && !isClobberedInFunction(&I);
Instruction *PtrI = dyn_cast<Instruction>(Ptr);
if (!PtrI && NotClobbered && isGlobalLoad(I)) {
if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
DA = &getAnalysis<LegacyDivergenceAnalysis>();
MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
+ isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv());
visit(F);
noClobberClones.clear();
; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], 0
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
- %val = load float, float addrspace(1)* %ptr
+ %val = load volatile float, float addrspace(1)* %ptr
ret float %val
}
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr float, float addrspace(1)* %ptr, i64 4095
- %val = load float, float addrspace(1)* %gep
+ %val = load volatile float, float addrspace(1)* %gep
ret float %val
}
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967296
- %val = load float, float addrspace(1)* %gep
+ %val = load volatile float, float addrspace(1)* %gep
ret float %val
}
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967297
- %val = load float, float addrspace(1)* %gep
+ %val = load volatile float, float addrspace(1)* %gep
ret float %val
}
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr float, float addrspace(1)* %ptr, i64 4096
- %val = load float, float addrspace(1)* %gep
+ %val = load volatile float, float addrspace(1)* %gep
ret float %val
}
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr float, float addrspace(1)* %ptr, i64 4095
- %val = load float, float addrspace(1)* %gep
+ %val = load volatile float, float addrspace(1)* %gep
ret float %val
}
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967296
- %val = load float, float addrspace(1)* %gep
+ %val = load volatile float, float addrspace(1)* %gep
ret float %val
}
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967297
- %val = load float, float addrspace(1)* %gep
+ %val = load volatile float, float addrspace(1)* %gep
ret float %val
}
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr float, float addrspace(1)* %ptr, i64 4096
- %val = load float, float addrspace(1)* %gep
+ %val = load volatile float, float addrspace(1)* %gep
ret float %val
}
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr float, float addrspace(1)* %ptr, i32 %soffset
- %val = load float, float addrspace(1)* %gep
+ %val = load volatile float, float addrspace(1)* %gep
ret float %val
}
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr float, float addrspace(1)* %ptr, i32 %soffset
- %val = load float, float addrspace(1)* %gep
+ %val = load volatile float, float addrspace(1)* %gep
ret float %val
}
; GFX7-NEXT: ; return to shader part epilog
%gep0 = getelementptr float, float addrspace(1)* %ptr, i32 %soffset
%gep1 = getelementptr float, float addrspace(1)* %gep0, i32 256
- %val = load float, float addrspace(1)* %gep1
+ %val = load volatile float, float addrspace(1)* %gep1
ret float %val
}
; GFX7-NEXT: ; return to shader part epilog
%gep0 = getelementptr float, float addrspace(1)* %ptr, i64 256
%gep1 = getelementptr float, float addrspace(1)* %gep0, i32 %soffset
- %val = load float, float addrspace(1)* %gep1
+ %val = load volatile float, float addrspace(1)* %gep1
ret float %val
}
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ; return to shader part epilog
%gep = getelementptr float, float addrspace(1)* %ptr, i32 %voffset
- %val = load float, float addrspace(1)* %gep
+ %val = load volatile float, float addrspace(1)* %gep
ret float %val
}
; GFX7-NEXT: ; return to shader part epilog
%gep0 = getelementptr float, float addrspace(1)* %ptr, i32 %voffset
%gep1 = getelementptr float, float addrspace(1)* %gep0, i64 4095
- %val = load float, float addrspace(1)* %gep1
+ %val = load volatile float, float addrspace(1)* %gep1
ret float %val
}
define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) {
; GFX7-NEXT: ; return to shader part epilog
%gep0 = getelementptr float, float addrspace(1)* %ptr, i64 4095
%gep1 = getelementptr float, float addrspace(1)* %gep0, i32 %voffset
- %val = load float, float addrspace(1)* %gep1
+ %val = load volatile float, float addrspace(1)* %gep1
ret float %val
}