swr/rast: Add translation support to streamout
authorAlok Hota <alok.hota@intel.com>
Fri, 14 Sep 2018 14:45:26 +0000 (09:45 -0500)
committerAlok Hota <alok.hota@intel.com>
Fri, 15 Feb 2019 20:54:29 +0000 (14:54 -0600)
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
12 files changed:
src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
src/gallium/drivers/swr/rasterizer/core/api.cpp
src/gallium/drivers/swr/rasterizer/core/api.h
src/gallium/drivers/swr/rasterizer/core/context.h
src/gallium/drivers/swr/rasterizer/core/frontend.cpp
src/gallium/drivers/swr/rasterizer/core/state.h
src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
src/gallium/drivers/swr/swr_state.cpp

index 485403a..97a08d2 100644 (file)
@@ -162,6 +162,8 @@ def parse_ir_builder(input_file):
                         func_name == 'CreateGEP' or
                         func_name == 'CreateLoad' or
                         func_name == 'CreateMaskedLoad' or
+                                               func_name == 'CreateStore' or
+                        func_name == 'CreateMaskedStore' or
                         func_name == 'CreateElementUnorderedAtomicMemCpy'):
                         ignore = True
 
index 0423a4f..e260c95 100644 (file)
@@ -193,13 +193,15 @@ HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo)
     pContext->pHotTileMgr = new HotTileMgr();
 
     // initialize callback functions
-    pContext->pfnLoadTile            = pCreateInfo->pfnLoadTile;
-    pContext->pfnStoreTile           = pCreateInfo->pfnStoreTile;
-    pContext->pfnClearTile           = pCreateInfo->pfnClearTile;
-    pContext->pfnMakeGfxPtr          = pCreateInfo->pfnMakeGfxPtr;
-    pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset;
-    pContext->pfnUpdateStats         = pCreateInfo->pfnUpdateStats;
-    pContext->pfnUpdateStatsFE       = pCreateInfo->pfnUpdateStatsFE;
+    pContext->pfnLoadTile                 = pCreateInfo->pfnLoadTile;
+    pContext->pfnStoreTile                = pCreateInfo->pfnStoreTile;
+    pContext->pfnClearTile                = pCreateInfo->pfnClearTile;
+    pContext->pfnTranslateGfxptrForRead   = pCreateInfo->pfnTranslateGfxptrForRead;
+    pContext->pfnTranslateGfxptrForWrite  = pCreateInfo->pfnTranslateGfxptrForWrite;
+    pContext->pfnMakeGfxPtr               = pCreateInfo->pfnMakeGfxPtr;
+    pContext->pfnUpdateSoWriteOffset      = pCreateInfo->pfnUpdateSoWriteOffset;
+    pContext->pfnUpdateStats              = pCreateInfo->pfnUpdateStats;
+    pContext->pfnUpdateStatsFE            = pCreateInfo->pfnUpdateStatsFE;
 
 
     // pass pointer to bucket manager back to caller
index b33dc96..e31e044 100644 (file)
@@ -145,6 +145,15 @@ typedef void(SWR_API* PFN_CLEAR_TILE)(HANDLE                      hPrivateContex
                                       uint32_t                    renderTargetArrayIndex,
                                       const float*                pClearColor);
 
+
+typedef void* (SWR_API* PFN_TRANSLATE_GFXPTR_FOR_READ)(HANDLE hPrivateContext, 
+                                                    gfxptr_t xpAddr, 
+                                                    bool* pbNullTileAccessed);
+
+typedef void* (SWR_API* PFN_TRANSLATE_GFXPTR_FOR_WRITE)(HANDLE hPrivateContext, 
+                                                        gfxptr_t xpAddr, 
+                                                        bool* pbNullTileAccessed);
+
 typedef gfxptr_t(SWR_API* PFN_MAKE_GFXPTR)(HANDLE                 hPrivateContext,
                                            void*                  sysAddr);
 
@@ -241,13 +250,15 @@ struct SWR_CREATECONTEXT_INFO
     SWR_WORKER_PRIVATE_STATE* pWorkerPrivateState;
 
     // Callback functions
-    PFN_LOAD_TILE              pfnLoadTile;
-    PFN_STORE_TILE             pfnStoreTile;
-    PFN_CLEAR_TILE             pfnClearTile;
-    PFN_MAKE_GFXPTR            pfnMakeGfxPtr;
-    PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
-    PFN_UPDATE_STATS           pfnUpdateStats;
-    PFN_UPDATE_STATS_FE        pfnUpdateStatsFE;
+    PFN_LOAD_TILE                   pfnLoadTile;
+    PFN_STORE_TILE                  pfnStoreTile;
+    PFN_CLEAR_TILE                  pfnClearTile;
+    PFN_TRANSLATE_GFXPTR_FOR_READ   pfnTranslateGfxptrForRead;
+    PFN_TRANSLATE_GFXPTR_FOR_WRITE  pfnTranslateGfxptrForWrite;
+    PFN_MAKE_GFXPTR                 pfnMakeGfxPtr;
+    PFN_UPDATE_SO_WRITE_OFFSET      pfnUpdateSoWriteOffset;
+    PFN_UPDATE_STATS                pfnUpdateStats;
+    PFN_UPDATE_STATS_FE             pfnUpdateStatsFE;
 
 
     // Pointer to rdtsc buckets mgr returned to the caller.
index 393d42b..efbddb0 100644 (file)
@@ -525,13 +525,15 @@ struct SWR_CONTEXT
     HotTileMgr* pHotTileMgr;
 
     // Callback functions, passed in at create context time
-    PFN_LOAD_TILE              pfnLoadTile;
-    PFN_STORE_TILE             pfnStoreTile;
-    PFN_CLEAR_TILE             pfnClearTile;
-    PFN_MAKE_GFXPTR            pfnMakeGfxPtr;
-    PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
-    PFN_UPDATE_STATS           pfnUpdateStats;
-    PFN_UPDATE_STATS_FE        pfnUpdateStatsFE;
+    PFN_LOAD_TILE                   pfnLoadTile;
+    PFN_STORE_TILE                  pfnStoreTile;
+    PFN_CLEAR_TILE                  pfnClearTile;
+    PFN_TRANSLATE_GFXPTR_FOR_READ   pfnTranslateGfxptrForRead;
+    PFN_TRANSLATE_GFXPTR_FOR_WRITE  pfnTranslateGfxptrForWrite;
+    PFN_MAKE_GFXPTR                 pfnMakeGfxPtr;
+    PFN_UPDATE_SO_WRITE_OFFSET      pfnUpdateSoWriteOffset;
+    PFN_UPDATE_STATS                pfnUpdateStats;
+    PFN_UPDATE_STATS_FE             pfnUpdateStatsFE;
 
 
     // Global Stats
index 6ba6784..85b2e8d 100644 (file)
@@ -575,7 +575,7 @@ static void StreamOut(
         // Call SOS
         SWR_ASSERT(state.pfnSoFunc[streamIndex] != nullptr,
                    "Trying to execute uninitialized streamout jit function.");
-        state.pfnSoFunc[streamIndex](soContext);
+        state.pfnSoFunc[streamIndex](GetPrivateState(pDC), soContext);
     }
 
     // Update SO write offset. The driver provides memory for the update.
@@ -583,7 +583,9 @@ static void StreamOut(
     {
         if (state.soBuffer[i].pWriteOffset)
         {
-            *state.soBuffer[i].pWriteOffset = soContext.pBuffer[i]->streamOffset * sizeof(uint32_t);
+            bool nullTileAccessed = false;
+            void* pWriteOffset = pDC->pContext->pfnTranslateGfxptrForWrite(GetPrivateState(pDC), soContext.pBuffer[i]->pWriteOffset, &nullTileAccessed);
+            *((uint32_t*)pWriteOffset) = soContext.pBuffer[i]->streamOffset * sizeof(uint32_t);
         }
 
         if (state.soBuffer[i].soWriteEnable)
index c2594b0..04fad69 100644 (file)
@@ -678,10 +678,10 @@ OSALIGNLINE(struct) SWR_STATS_FE
 struct SWR_STREAMOUT_BUFFER
 {
     // Pointers to streamout buffers.
-    uint32_t* pBuffer;
+    gfxptr_t pBuffer;
 
     // Offset to the SO write offset. If not null then we update offset here.
-    uint32_t* pWriteOffset;
+    gfxptr_t pWriteOffset;
 
     bool enable;
     bool soWriteEnable;
@@ -936,7 +936,7 @@ typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateDat
 typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_DS_CONTEXT* pDsContext);
 typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_GS_CONTEXT* pGsContext);
 typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_CS_CONTEXT* pCsContext);
-typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
+typedef void(__cdecl *PFN_SO_FUNC)(HANDLE hPrivateData, SWR_STREAMOUT_CONTEXT& soContext);
 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext);
 typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext);
 typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(SWR_BLEND_CONTEXT*);
index 19eec7e..b3d0b70 100644 (file)
@@ -237,6 +237,30 @@ namespace SwrJit
         return Builder::MASKED_LOAD(Ptr, Align, Mask, PassThru, Name, Ty, usage);
     }
 
+    StoreInst* BuilderGfxMem::STORE(Value *Val, Value *Ptr, bool isVolatile, Type* Ty, JIT_MEM_CLIENT usage)
+    {
+        AssertGFXMemoryParams(Ptr, usage);
+
+        Ptr = TranslationHelper(Ptr, Ty);
+        return Builder::STORE(Val, Ptr, isVolatile, Ty, usage);
+    }
+
+    StoreInst* BuilderGfxMem::STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty, JIT_MEM_CLIENT usage)
+    {
+        AssertGFXMemoryParams(BasePtr, usage);
+
+        BasePtr = TranslationHelper(BasePtr, Ty);
+        return Builder::STORE(Val, BasePtr, offset, Ty, usage);
+    }
+
+    CallInst* BuilderGfxMem::MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty, JIT_MEM_CLIENT usage)
+    {
+        AssertGFXMemoryParams(Ptr, usage);
+
+        Ptr = TranslationHelper(Ptr, Ty);
+        return Builder::MASKED_STORE(Val, Ptr, Align, Mask, Ty, usage);
+    }
+
     Value* BuilderGfxMem::TranslateGfxAddressForRead(Value*       xpGfxAddress,
                                                      Type*        PtrTy,
                                                      const Twine& Name,
index 4cf0625..1bbe86d 100644 (file)
@@ -76,6 +76,12 @@ namespace SwrJit
                                       Type*          Ty       = nullptr,
                                       JIT_MEM_CLIENT usage    = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
 
+        virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+        
+        virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+
+        virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+
         virtual Value* GATHERPS(Value*         src,
                                 Value*         pBase,
                                 Value*         indices,
index a9d6490..90a0e03 100644 (file)
@@ -141,7 +141,7 @@ namespace SwrJit
     }
 
     StoreInst*
-    Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices)
+    Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices, Type* Ty, JIT_MEM_CLIENT usage)
     {
         std::vector<Value*> valIndices;
         for (auto i : indices)
index c533984..ccf42c8 100644 (file)
@@ -36,6 +36,7 @@ enum class JIT_MEM_CLIENT
     GFX_MEM_CLIENT_FETCH,
     GFX_MEM_CLIENT_SAMPLER,
     GFX_MEM_CLIENT_SHADER,
+    GFX_MEM_CLIENT_STREAMOUT
 };
 
 protected:
@@ -85,9 +86,19 @@ virtual CallInst* MASKED_LOAD(Value*         Ptr,
     return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name);
 }
 
-LoadInst*
-           LOADV(Value* BasePtr, const std::initializer_list<Value*>& offset, const llvm::Twine& name = "");
-StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset);
+virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
+{
+    return IRB()->CreateStore(Val, Ptr, isVolatile);
+}
+
+virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL);
+
+virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, JIT_MEM_CLIENT usage = JIT_MEM_CLIENT::MEM_CLIENT_INTERNAL)
+{
+    return IRB()->CreateMaskedStore(Val, Ptr, Align, Mask);
+}
+
+LoadInst*  LOADV(Value* BasePtr, const std::initializer_list<Value*>& offset, const llvm::Twine& name = "");
 StoreInst* STOREV(Value* Val, Value* BasePtr, const std::initializer_list<Value*>& offset);
 
 Value* MEM_ADD(Value*                                 i32Incr,
index 11ad365..2e99bb7 100644 (file)
@@ -28,7 +28,7 @@
  *
  ******************************************************************************/
 #include "jit_pch.hpp"
-#include "builder.h"
+#include "builder_gfx_mem.h"
 #include "jit_api.h"
 #include "streamout_jit.h"
 #include "gen_state_llvm.h"
@@ -40,9 +40,9 @@ using namespace SwrJit;
 //////////////////////////////////////////////////////////////////////////
 /// Interface to Jitting a fetch shader
 //////////////////////////////////////////////////////////////////////////
-struct StreamOutJit : public Builder
+struct StreamOutJit : public BuilderGfxMem
 {
-    StreamOutJit(JitManager* pJitMgr) : Builder(pJitMgr){};
+    StreamOutJit(JitManager* pJitMgr) : BuilderGfxMem(pJitMgr){};
 
     // returns pointer to SWR_STREAMOUT_BUFFER
     Value* getSOBuffer(Value* pSoCtx, uint32_t buffer)
@@ -155,7 +155,7 @@ struct StreamOutJit : public Builder
 
             // cast mask to <4xi1>
             Value* mask = ToMask(packedMask);
-            MASKED_STORE(src, pOut, 4, mask);
+            MASKED_STORE(src, pOut, 4, mask, PointerType::get(simd4Ty, 0), JIT_MEM_CLIENT::GFX_MEM_CLIENT_STREAMOUT);
         }
 
         // increment SO buffer
@@ -223,7 +223,7 @@ struct StreamOutJit : public Builder
             Value* pBuf              = getSOBuffer(pSoCtx, b);
             Value* pData             = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pBuffer});
             Value* streamOffset      = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
-            pOutBuffer[b]            = GEP(pData, streamOffset);
+            pOutBuffer[b] = GEP(pData, streamOffset, PointerType::get(IRB()->getInt32Ty(), 0)); 
             pOutBufferStartVertex[b] = pOutBuffer[b];
 
             outBufferPitch[b] = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pitch});
@@ -264,9 +264,13 @@ struct StreamOutJit : public Builder
         fnName << ComputeCRC(0, &state, sizeof(state));
 
         // SO function signature
-        // typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT*)
+        // typedef void(__cdecl *PFN_SO_FUNC)(SimDrawContext, SWR_STREAMOUT_CONTEXT*)
+
+        Type* typeParam0;
+        typeParam0 = mInt8PtrTy;
 
         std::vector<Type*> args{
+            typeParam0,
             PointerType::get(Gen_SWR_STREAMOUT_CONTEXT(JM()), 0), // SWR_STREAMOUT_CONTEXT*
         };
 
@@ -284,6 +288,11 @@ struct StreamOutJit : public Builder
 
         // arguments
         auto   argitr = soFunc->arg_begin();
+
+        Value* privateContext = &*argitr++;
+        privateContext->setName("privateContext");
+        SetPrivateContext(privateContext);
+
         Value* pSoCtx = &*argitr++;
         pSoCtx->setName("pSoCtx");
 
index e540ddb..d7baa71 100644 (file)
@@ -1746,7 +1746,7 @@ swr_update_derived(struct pipe_context *pipe,
             continue;
          buffer.enable = true;
          buffer.pBuffer =
-            (uint32_t *)(swr_resource_data(ctx->so_targets[i]->buffer) +
+            (gfxptr_t)(swr_resource_data(ctx->so_targets[i]->buffer) +
                          ctx->so_targets[i]->buffer_offset);
          buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
          buffer.pitch = stream_output->stride[i];