func_name == 'CreateGEP' or
func_name == 'CreateLoad' or
func_name == 'CreateMaskedLoad' or
+ func_name == 'CreateStore' or
+ func_name == 'CreateMaskedStore' or
func_name == 'CreateElementUnorderedAtomicMemCpy'):
ignore = True
pContext->pHotTileMgr = new HotTileMgr();
// initialize callback functions
- pContext->pfnLoadTile = pCreateInfo->pfnLoadTile;
- pContext->pfnStoreTile = pCreateInfo->pfnStoreTile;
- pContext->pfnClearTile = pCreateInfo->pfnClearTile;
- pContext->pfnMakeGfxPtr = pCreateInfo->pfnMakeGfxPtr;
- pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset;
- pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats;
- pContext->pfnUpdateStatsFE = pCreateInfo->pfnUpdateStatsFE;
+ pContext->pfnLoadTile = pCreateInfo->pfnLoadTile;
+ pContext->pfnStoreTile = pCreateInfo->pfnStoreTile;
+ pContext->pfnClearTile = pCreateInfo->pfnClearTile;
+ pContext->pfnTranslateGfxptrForRead = pCreateInfo->pfnTranslateGfxptrForRead;
+ pContext->pfnTranslateGfxptrForWrite = pCreateInfo->pfnTranslateGfxptrForWrite;
+ pContext->pfnMakeGfxPtr = pCreateInfo->pfnMakeGfxPtr;
+ pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset;
+ pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats;
+ pContext->pfnUpdateStatsFE = pCreateInfo->pfnUpdateStatsFE;
// pass pointer to bucket manager back to caller
uint32_t renderTargetArrayIndex,
const float* pClearColor);
+
+typedef void* (SWR_API* PFN_TRANSLATE_GFXPTR_FOR_READ)(HANDLE hPrivateContext,
+ gfxptr_t xpAddr,
+ bool* pbNullTileAccessed);
+
+typedef void* (SWR_API* PFN_TRANSLATE_GFXPTR_FOR_WRITE)(HANDLE hPrivateContext,
+ gfxptr_t xpAddr,
+ bool* pbNullTileAccessed);
+
typedef gfxptr_t(SWR_API* PFN_MAKE_GFXPTR)(HANDLE hPrivateContext,
void* sysAddr);
SWR_WORKER_PRIVATE_STATE* pWorkerPrivateState;
// Callback functions
- PFN_LOAD_TILE pfnLoadTile;
- PFN_STORE_TILE pfnStoreTile;
- PFN_CLEAR_TILE pfnClearTile;
- PFN_MAKE_GFXPTR pfnMakeGfxPtr;
- PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
- PFN_UPDATE_STATS pfnUpdateStats;
- PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
+ PFN_LOAD_TILE pfnLoadTile;
+ PFN_STORE_TILE pfnStoreTile;
+ PFN_CLEAR_TILE pfnClearTile;
+ PFN_TRANSLATE_GFXPTR_FOR_READ pfnTranslateGfxptrForRead;
+ PFN_TRANSLATE_GFXPTR_FOR_WRITE pfnTranslateGfxptrForWrite;
+ PFN_MAKE_GFXPTR pfnMakeGfxPtr;
+ PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
+ PFN_UPDATE_STATS pfnUpdateStats;
+ PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
// Pointer to rdtsc buckets mgr returned to the caller.
HotTileMgr* pHotTileMgr;
// Callback functions, passed in at create context time
- PFN_LOAD_TILE pfnLoadTile;
- PFN_STORE_TILE pfnStoreTile;
- PFN_CLEAR_TILE pfnClearTile;
- PFN_MAKE_GFXPTR pfnMakeGfxPtr;
- PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
- PFN_UPDATE_STATS pfnUpdateStats;
- PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
+ PFN_LOAD_TILE pfnLoadTile;
+ PFN_STORE_TILE pfnStoreTile;
+ PFN_CLEAR_TILE pfnClearTile;
+ PFN_TRANSLATE_GFXPTR_FOR_READ pfnTranslateGfxptrForRead;
+ PFN_TRANSLATE_GFXPTR_FOR_WRITE pfnTranslateGfxptrForWrite;
+ PFN_MAKE_GFXPTR pfnMakeGfxPtr;
+ PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
+ PFN_UPDATE_STATS pfnUpdateStats;
+ PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
// Global Stats
// Call SOS
SWR_ASSERT(state.pfnSoFunc[streamIndex] != nullptr,
"Trying to execute uninitialized streamout jit function.");
- state.pfnSoFunc[streamIndex](soContext);
+ state.pfnSoFunc[streamIndex](GetPrivateState(pDC), soContext);
}
// Update SO write offset. The driver provides memory for the update.
{
if (state.soBuffer[i].pWriteOffset)
{
- *state.soBuffer[i].pWriteOffset = soContext.pBuffer[i]->streamOffset * sizeof(uint32_t);
+ bool nullTileAccessed = false;
+ void* pWriteOffset = pDC->pContext->pfnTranslateGfxptrForWrite(GetPrivateState(pDC), soContext.pBuffer[i]->pWriteOffset, &nullTileAccessed);
+ *((uint32_t*)pWriteOffset) = soContext.pBuffer[i]->streamOffset * sizeof(uint32_t);
}
if (state.soBuffer[i].soWriteEnable)
struct SWR_STREAMOUT_BUFFER
{
// Pointers to streamout buffers.
- uint32_t* pBuffer;
+ gfxptr_t pBuffer;
// Offset to the SO write offset. If not null then we update offset here.
- uint32_t* pWriteOffset;
+ gfxptr_t pWriteOffset;
bool enable;
bool soWriteEnable;
typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_DS_CONTEXT* pDsContext);
typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_GS_CONTEXT* pGsContext);
typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_CS_CONTEXT* pCsContext);
-typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
+typedef void(__cdecl *PFN_SO_FUNC)(HANDLE hPrivateData, SWR_STREAMOUT_CONTEXT& soContext);
typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext);
typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext);
typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(SWR_BLEND_CONTEXT*);
return Builder::MASKED_LOAD(Ptr, Align, Mask, PassThru, Name, Ty, usage);
}
+ StoreInst* BuilderGfxMem::STORE(Value *Val, Value *Ptr, bool isVolatile, Type* Ty, JIT_MEM_CLIENT usage)
+ {
+ AssertGFXMemoryParams(Ptr, usage);
+
+ Ptr = TranslationHelper(Ptr, Ty);
+ return Builder::STORE(Val, Ptr, isVolatile, Ty, usage);
+ }
+
+ StoreInst* BuilderGfxMem::STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty, JIT_MEM_CLIENT usage)
+ {
+ AssertGFXMemoryParams(BasePtr, usage);
+
+ BasePtr = TranslationHelper(BasePtr, Ty);
+ return Builder::STORE(Val, BasePtr, offset, Ty, usage);
+ }
+
+ CallInst* BuilderGfxMem::MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty, JIT_MEM_CLIENT usage)
+ {
+ AssertGFXMemoryParams(Ptr, usage);
+
+ Ptr = TranslationHelper(Ptr, Ty);
+ return Builder::MASKED_STORE(Val, Ptr, Align, Mask, Ty, usage);
+ }
+
Value* BuilderGfxMem::TranslateGfxAddressForRead(Value* xpGfxAddress,
Type* PtrTy,
const Twine& Name,
Type* Ty = nullptr,
JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+ virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+
+ virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+
+ virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+
virtual Value* GATHERPS(Value* src,
Value* pBase,
Value* indices,
}
StoreInst*
- Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices)
+ Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices, Type* Ty, JIT_MEM_CLIENT usage)
{
std::vector<Value*> valIndices;
for (auto i : indices)
GFX_MEM_CLIENT_FETCH,
GFX_MEM_CLIENT_SAMPLER,
GFX_MEM_CLIENT_SHADER,
+ GFX_MEM_CLIENT_STREAMOUT
};
protected:
return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name);
}
-LoadInst*
- LOADV(Value* BasePtr, const std::initializer_list<Value*>& offset, const llvm::Twine& name = "");
-StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset);
+virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
+{
+ return IRB()->CreateStore(Val, Ptr, isVolatile);
+}
+
+virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+
+virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
+{
+ return IRB()->CreateMaskedStore(Val, Ptr, Align, Mask);
+}
+
+LoadInst* LOADV(Value* BasePtr, const std::initializer_list<Value*>& offset, const llvm::Twine& name = "");
StoreInst* STOREV(Value* Val, Value* BasePtr, const std::initializer_list<Value*>& offset);
Value* MEM_ADD(Value* i32Incr,
*
******************************************************************************/
#include "jit_pch.hpp"
-#include "builder.h"
+#include "builder_gfx_mem.h"
#include "jit_api.h"
#include "streamout_jit.h"
#include "gen_state_llvm.h"
//////////////////////////////////////////////////////////////////////////
/// Interface to Jitting a fetch shader
//////////////////////////////////////////////////////////////////////////
-struct StreamOutJit : public Builder
+struct StreamOutJit : public BuilderGfxMem
{
- StreamOutJit(JitManager* pJitMgr) : Builder(pJitMgr){};
+ StreamOutJit(JitManager* pJitMgr) : BuilderGfxMem(pJitMgr){};
// returns pointer to SWR_STREAMOUT_BUFFER
Value* getSOBuffer(Value* pSoCtx, uint32_t buffer)
// cast mask to <4xi1>
Value* mask = ToMask(packedMask);
- MASKED_STORE(src, pOut, 4, mask);
+ MASKED_STORE(src, pOut, 4, mask, PointerType::get(simd4Ty, 0), JIT_MEM_CLIENT::GFX_MEM_CLIENT_STREAMOUT);
}
// increment SO buffer
Value* pBuf = getSOBuffer(pSoCtx, b);
Value* pData = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pBuffer});
Value* streamOffset = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
- pOutBuffer[b] = GEP(pData, streamOffset);
+ pOutBuffer[b] = GEP(pData, streamOffset, PointerType::get(IRB()->getInt32Ty(), 0));
pOutBufferStartVertex[b] = pOutBuffer[b];
outBufferPitch[b] = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pitch});
fnName << ComputeCRC(0, &state, sizeof(state));
// SO function signature
- // typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT*)
+ // typedef void(__cdecl *PFN_SO_FUNC)(SimDrawContext, SWR_STREAMOUT_CONTEXT*)
+
+ Type* typeParam0;
+ typeParam0 = mInt8PtrTy;
std::vector<Type*> args{
+ typeParam0,
PointerType::get(Gen_SWR_STREAMOUT_CONTEXT(JM()), 0), // SWR_STREAMOUT_CONTEXT*
};
// arguments
auto argitr = soFunc->arg_begin();
+
+ Value* privateContext = &*argitr++;
+ privateContext->setName("privateContext");
+ SetPrivateContext(privateContext);
+
Value* pSoCtx = &*argitr++;
pSoCtx->setName("pSoCtx");
continue;
buffer.enable = true;
buffer.pBuffer =
- (uint32_t *)(swr_resource_data(ctx->so_targets[i]->buffer) +
+ (gfxptr_t)(swr_resource_data(ctx->so_targets[i]->buffer) +
ctx->so_targets[i]->buffer_offset);
buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
buffer.pitch = stream_output->stride[i];