rasterizer/swr: move BucketMgr to SwrContext
authorJan Zielinski <jan.zielinski@intel.com>
Wed, 24 Jul 2019 10:03:49 +0000 (12:03 +0200)
committerJan Zielinski <jan.zielinski@intel.com>
Tue, 30 Jul 2019 13:39:18 +0000 (13:39 +0000)
This move gets us back to parity  with global manager
in that we can dump render context buckets now.

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
21 files changed:
src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h
src/gallium/drivers/swr/rasterizer/core/api.cpp
src/gallium/drivers/swr/rasterizer/core/api.h
src/gallium/drivers/swr/rasterizer/core/backend.cpp
src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp
src/gallium/drivers/swr/rasterizer/core/backend_impl.h
src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
src/gallium/drivers/swr/rasterizer/core/binner.cpp
src/gallium/drivers/swr/rasterizer/core/clip.cpp
src/gallium/drivers/swr/rasterizer/core/clip.h
src/gallium/drivers/swr/rasterizer/core/context.h
src/gallium/drivers/swr/rasterizer/core/frontend.cpp
src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h
src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp
src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h
src/gallium/drivers/swr/rasterizer/core/state.h
src/gallium/drivers/swr/rasterizer/core/threads.cpp
src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
src/gallium/drivers/swr/swr_context.cpp

index bbc9538..b00cbf6 100644 (file)
@@ -48,7 +48,17 @@ extern THREAD UINT tlsThreadId;
 class BucketManager
 {
 public:
-    BucketManager() {}
+
+    uint32_t mCurrentFrame;
+    std::vector<uint32_t> mBucketMap;
+    bool                  mBucketsInitialized;
+    std::string           mBucketMgrName;
+
+
+    BucketManager(std::string name) : mCurrentFrame(0), mBucketsInitialized(false), mBucketMgrName(name) 
+    {
+        mBucketMap.clear();
+    }
     ~BucketManager();
 
     // removes all registered thread data
index f9b86cf..a043a34 100644 (file)
@@ -65,9 +65,6 @@ void WakeAllThreads(SWR_CONTEXT* pContext)
 /// @param pCreateInfo - pointer to creation info.
 HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo)
 {
-    RDTSC_RESET();
-    RDTSC_INIT(0);
-
     void* pContextMem = AlignedMalloc(sizeof(SWR_CONTEXT), KNOB_SIMD_WIDTH * 4);
     memset(pContextMem, 0, sizeof(SWR_CONTEXT));
     SWR_CONTEXT* pContext = new (pContextMem) SWR_CONTEXT();
@@ -157,6 +154,12 @@ HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo)
         ArchRast::CreateThreadContext(ArchRast::AR_THREAD::API);
 #endif
 
+#if defined(KNOB_ENABLE_RDTSC)
+    pContext->pBucketMgr = new BucketManager(pCreateInfo->contextName);
+    RDTSC_RESET(pContext->pBucketMgr);
+    RDTSC_INIT(pContext->pBucketMgr, 0);
+#endif
+
     // Allocate scratch space for workers.
     ///@note We could lazily allocate this but its rather small amount of memory.
     for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
@@ -205,7 +208,7 @@ HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo)
 
     // pass pointer to bucket manager back to caller
 #ifdef KNOB_ENABLE_RDTSC
-    pCreateInfo->pBucketMgr = &gBucketMgr;
+    pCreateInfo->pBucketMgr = pContext->pBucketMgr;
 #endif
 
     pCreateInfo->contextSaveSize = sizeof(API_STATE);
@@ -277,9 +280,9 @@ void QueueWork(SWR_CONTEXT* pContext)
     }
     else
     {
-        RDTSC_BEGIN(APIDrawWakeAllThreads, pDC->drawId);
+        RDTSC_BEGIN(pContext->pBucketMgr, APIDrawWakeAllThreads, pDC->drawId);
         WakeAllThreads(pContext);
-        RDTSC_END(APIDrawWakeAllThreads, 1);
+        RDTSC_END(pContext->pBucketMgr, APIDrawWakeAllThreads, 1);
     }
 
     // Set current draw context to NULL so that next state call forces a new draw context to be
@@ -300,7 +303,7 @@ INLINE void QueueDispatch(SWR_CONTEXT* pContext)
 
 DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT* pContext, bool isSplitDraw = false)
 {
-    RDTSC_BEGIN(APIGetDrawContext, 0);
+    RDTSC_BEGIN(pContext->pBucketMgr, APIGetDrawContext, 0);
     // If current draw context is null then need to obtain a new draw context to use from ring.
     if (pContext->pCurDrawContext == nullptr)
     {
@@ -389,7 +392,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT* pContext, bool isSplitDraw = false)
         SWR_ASSERT(isSplitDraw == false, "Split draw should only be used when obtaining a new DC");
     }
 
-    RDTSC_END(APIGetDrawContext, 0);
+    RDTSC_END(pContext->pBucketMgr, APIGetDrawContext, 0);
     return pContext->pCurDrawContext;
 }
 
@@ -441,6 +444,10 @@ void SwrDestroyContext(HANDLE hContext)
 #endif
     }
 
+#if defined(KNOB_ENABLE_RDTSC)
+    delete pContext->pBucketMgr;
+#endif
+
     delete[] pContext->ppScratch;
     AlignedFree(pContext->pStats);
 
@@ -498,7 +505,7 @@ void SWR_API SwrSync(HANDLE            hContext,
     SWR_CONTEXT*  pContext = GetContext(hContext);
     DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
 
-    RDTSC_BEGIN(APISync, 0);
+    RDTSC_BEGIN(pContext->pBucketMgr, APISync, 0);
 
     pDC->FeWork.type    = SYNC;
     pDC->FeWork.pfnWork = ProcessSync;
@@ -514,7 +521,7 @@ void SWR_API SwrSync(HANDLE            hContext,
     // enqueue
     QueueDraw(pContext);
 
-    RDTSC_END(APISync, 1);
+    RDTSC_END(pContext->pBucketMgr, APISync, 1);
 }
 
 void SwrStallBE(HANDLE hContext)
@@ -529,28 +536,28 @@ void SwrWaitForIdle(HANDLE hContext)
 {
     SWR_CONTEXT* pContext = GetContext(hContext);
 
-    RDTSC_BEGIN(APIWaitForIdle, 0);
+    RDTSC_BEGIN(pContext->pBucketMgr, APIWaitForIdle, 0);
 
     while (!pContext->dcRing.IsEmpty())
     {
         _mm_pause();
     }
 
-    RDTSC_END(APIWaitForIdle, 1);
+    RDTSC_END(pContext->pBucketMgr, APIWaitForIdle, 1);
 }
 
 void SwrWaitForIdleFE(HANDLE hContext)
 {
     SWR_CONTEXT* pContext = GetContext(hContext);
 
-    RDTSC_BEGIN(APIWaitForIdle, 0);
+    RDTSC_BEGIN(pContext->pBucketMgr, APIWaitForIdle, 0);
 
     while (pContext->drawsOutstandingFE > 0)
     {
         _mm_pause();
     }
 
-    RDTSC_END(APIWaitForIdle, 1);
+    RDTSC_END(pContext->pBucketMgr, APIWaitForIdle, 1);
 }
 
 void SwrSetVertexBuffers(HANDLE                         hContext,
@@ -1172,7 +1179,7 @@ void DrawInstanced(HANDLE             hContext,
     SWR_CONTEXT*  pContext = GetContext(hContext);
     DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
 
-    RDTSC_BEGIN(APIDraw, pDC->drawId);
+    RDTSC_BEGIN(pContext->pBucketMgr, APIDraw, pDC->drawId);
 
     uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology);
     uint32_t primsPerDraw    = GetNumPrims(topology, maxVertsPerDraw);
@@ -1243,7 +1250,7 @@ void DrawInstanced(HANDLE             hContext,
     pDC                                   = GetDrawContext(pContext);
     pDC->pState->state.rastState.cullMode = oldCullMode;
 
-    RDTSC_END(APIDraw, numVertices * numInstances);
+    RDTSC_END(pContext->pBucketMgr, APIDraw, numVertices * numInstances);
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -1307,7 +1314,7 @@ void DrawIndexedInstance(HANDLE             hContext,
     DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
     API_STATE*    pState   = &pDC->pState->state;
 
-    RDTSC_BEGIN(APIDrawIndexed, pDC->drawId);
+    RDTSC_BEGIN(pContext->pBucketMgr, APIDrawIndexed, pDC->drawId);
 
     uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology);
     uint32_t primsPerDraw      = GetNumPrims(topology, maxIndicesPerDraw);
@@ -1403,7 +1410,7 @@ void DrawIndexedInstance(HANDLE             hContext,
     pDC                                   = GetDrawContext(pContext);
     pDC->pState->state.rastState.cullMode = oldCullMode;
 
-    RDTSC_END(APIDrawIndexed, numIndices * numInstances);
+    RDTSC_END(pContext->pBucketMgr, APIDrawIndexed, numIndices * numInstances);
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -1529,7 +1536,7 @@ void SwrDispatch(HANDLE   hContext,
     SWR_CONTEXT*  pContext = GetContext(hContext);
     DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
 
-    RDTSC_BEGIN(APIDispatch, pDC->drawId);
+    RDTSC_BEGIN(pContext->pBucketMgr, APIDispatch, pDC->drawId);
     AR_API_EVENT(
         DispatchEvent(pDC->drawId, threadGroupCountX, threadGroupCountY, threadGroupCountZ));
     pDC->isCompute = true; // This is a compute context.
@@ -1546,7 +1553,9 @@ void SwrDispatch(HANDLE   hContext,
     pDC->pDispatch->initialize(totalThreadGroups, pTaskData, &ProcessComputeBE);
 
     QueueDispatch(pContext);
-    RDTSC_END(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ);
+    RDTSC_END(pContext->pBucketMgr,
+              APIDispatch,
+              threadGroupCountX * threadGroupCountY * threadGroupCountZ);
 }
 
 // Deswizzles, converts and stores current contents of the hot tiles to surface
@@ -1564,7 +1573,7 @@ void SWR_API SwrStoreTiles(HANDLE          hContext,
     SWR_CONTEXT*  pContext = GetContext(hContext);
     DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
 
-    RDTSC_BEGIN(APIStoreTiles, pDC->drawId);
+    RDTSC_BEGIN(pContext->pBucketMgr, APIStoreTiles, pDC->drawId);
 
     pDC->FeWork.type                               = STORETILES;
     pDC->FeWork.pfnWork                            = ProcessStoreTiles;
@@ -1578,7 +1587,7 @@ void SWR_API SwrStoreTiles(HANDLE          hContext,
 
     AR_API_EVENT(SwrStoreTilesEvent(pDC->drawId));
 
-    RDTSC_END(APIStoreTiles, 1);
+    RDTSC_END(pContext->pBucketMgr, APIStoreTiles, 1);
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -1606,7 +1615,7 @@ void SWR_API SwrClearRenderTarget(HANDLE          hContext,
     SWR_CONTEXT*  pContext = GetContext(hContext);
     DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
 
-    RDTSC_BEGIN(APIClearRenderTarget, pDC->drawId);
+    RDTSC_BEGIN(pContext->pBucketMgr, APIClearRenderTarget, pDC->drawId);
 
     pDC->FeWork.type            = CLEAR;
     pDC->FeWork.pfnWork         = ProcessClear;
@@ -1624,7 +1633,7 @@ void SWR_API SwrClearRenderTarget(HANDLE          hContext,
     // enqueue draw
     QueueDraw(pContext);
 
-    RDTSC_END(APIClearRenderTarget, 1);
+    RDTSC_END(pContext->pBucketMgr, APIClearRenderTarget, 1);
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -1697,7 +1706,7 @@ void SWR_API SwrEndFrame(HANDLE hContext)
     DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
     (void)pDC; // var used
 
-    RDTSC_ENDFRAME();
+    RDTSC_ENDFRAME(pContext->pBucketMgr);
     AR_API_EVENT(FrameEndEvent(pContext->frameCount, pDC->drawId));
 
     pContext->frameCount++;
index c842859..a3f065d 100644 (file)
@@ -277,6 +277,8 @@ struct SWR_CREATECONTEXT_INFO
     // Input: if set to non-zero value, overrides KNOB value for maximum
     // number of draws in flight
     uint32_t MAX_DRAWS_IN_FLIGHT;
+
+    std::string contextName;
 };
 
 //////////////////////////////////////////////////////////////////////////
index ad358bc..a435fa3 100644 (file)
@@ -52,7 +52,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC,
 {
     SWR_CONTEXT* pContext = pDC->pContext;
 
-    RDTSC_BEGIN(BEDispatch, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEDispatch, pDC->drawId);
 
     const COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pDispatch->GetTasksData();
     SWR_ASSERT(pTaskData != nullptr);
@@ -90,7 +90,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC,
     UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup);
     AR_EVENT(CSStats((HANDLE)&csContext.stats));
 
-    RDTSC_END(BEDispatch, 1);
+    RDTSC_END(pDC->pContext->pBucketMgr, BEDispatch, 1);
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -119,7 +119,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT*               pDC,
     SWR_CONTEXT* pContext           = pDC->pContext;
     HANDLE       hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
 
-    RDTSC_BEGIN(BEStoreTiles, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEStoreTiles, pDC->drawId);
 
     SWR_FORMAT srcFormat;
     switch (attachment)
@@ -194,7 +194,7 @@ void ProcessStoreTileBE(DRAW_CONTEXT*               pDC,
             }
         }
     }
-    RDTSC_END(BEStoreTiles, 1);
+    RDTSC_END(pDC->pContext->pBucketMgr, BEStoreTiles, 1);
 }
 
 void ProcessStoreTilesBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
@@ -247,9 +247,9 @@ void BackendNullPS(DRAW_CONTEXT*        pDC,
                    SWR_TRIANGLE_DESC&   work,
                    RenderOutputBuffers& renderBuffers)
 {
-    RDTSC_BEGIN(BENullBackend, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BENullBackend, pDC->drawId);
     ///@todo: handle center multisample pattern
-    RDTSC_BEGIN(BESetup, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId);
 
     const API_STATE& state = GetApiState(pDC);
 
@@ -262,7 +262,7 @@ void BackendNullPS(DRAW_CONTEXT*        pDC,
     SWR_PS_CONTEXT psContext;
     // skip SetupPixelShaderContext(&psContext, ...); // not needed here
 
-    RDTSC_END(BESetup, 0);
+    RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0);
 
     simdscalar vYSamplePosUL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
 
@@ -305,7 +305,7 @@ void BackendNullPS(DRAW_CONTEXT*        pDC,
                         coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
                     }
 
-                    RDTSC_BEGIN(BEBarycentric, pDC->drawId);
+                    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
 
                     // calculate per sample positions
                     psContext.vX.sample = _simd_add_ps(vXSamplePosUL, samplePos.vX(sample));
@@ -321,7 +321,7 @@ void BackendNullPS(DRAW_CONTEXT*        pDC,
                                             psContext.vJ.sample);
                     psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
 
-                    RDTSC_END(BEBarycentric, 0);
+                    RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0);
 
                     // interpolate user clip distance if available
                     if (state.backendState.clipDistanceMask)
@@ -335,7 +335,7 @@ void BackendNullPS(DRAW_CONTEXT*        pDC,
                     simdscalar vCoverageMask   = _simd_vmask_ps(coverageMask);
                     simdscalar stencilPassMask = vCoverageMask;
 
-                    RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId);
+                    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEarlyDepthTest, pDC->drawId);
                     simdscalar depthPassMask = DepthStencilTest(&state,
                                                                 work.triFlags.frontFacing,
                                                                 work.triFlags.viewportIndex,
@@ -356,7 +356,7 @@ void BackendNullPS(DRAW_CONTEXT*        pDC,
                                       vCoverageMask,
                                       pStencilSample,
                                       stencilPassMask);
-                    RDTSC_END(BEEarlyDepthTest, 0);
+                    RDTSC_END(pDC->pContext->pBucketMgr, BEEarlyDepthTest, 0);
 
                     uint32_t statMask  = _simd_movemask_ps(depthPassMask);
                     uint32_t statCount = _mm_popcnt_u32(statMask);
@@ -378,7 +378,7 @@ void BackendNullPS(DRAW_CONTEXT*        pDC,
         vYSamplePosUL = _simd_add_ps(vYSamplePosUL, dy);
     }
 
-    RDTSC_END(BENullBackend, 0);
+    RDTSC_END(pDC->pContext->pBucketMgr, BENullBackend, 0);
 }
 
 PFN_CLEAR_TILES  gClearTilesTable[NUM_SWR_FORMATS] = {};
index 5750cea..e772306 100644 (file)
@@ -168,7 +168,7 @@ void ProcessClearBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, vo
 
         SWR_ASSERT(pClear->attachmentMask != 0); // shouldn't be here without a reason.
 
-        RDTSC_BEGIN(BEClear, pDC->drawId);
+        RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEClear, pDC->drawId);
 
         if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
         {
@@ -226,13 +226,13 @@ void ProcessClearBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, vo
             pHotTile->state        = HOTTILE_CLEAR;
         }
 
-        RDTSC_END(BEClear, 1);
+        RDTSC_END(pDC->pContext->pBucketMgr, BEClear, 1);
     }
     else
     {
         // Legacy clear
         CLEAR_DESC* pClear = (CLEAR_DESC*)pUserData;
-        RDTSC_BEGIN(BEClear, pDC->drawId);
+        RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEClear, pDC->drawId);
 
         if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
         {
@@ -292,7 +292,7 @@ void ProcessClearBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, vo
                           pClear->rect);
         }
 
-        RDTSC_END(BEClear, 1);
+        RDTSC_END(pDC->pContext->pBucketMgr, BEClear, 1);
     }
 }
 
index 83d662b..ad6b78a 100644 (file)
@@ -755,7 +755,7 @@ struct PixelRateZTestLoop
                                  _simd_vmask_ps(CalcDepthBoundsAcceptMask(z, minz, maxz)));
             }
 
-            RDTSC_BEGIN(BEBarycentric, pDC->drawId);
+            RDTSC_BEGIN(psContext.pBucketManager, BEBarycentric, pDC->drawId);
 
             // calculate per sample positions
             psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
@@ -778,7 +778,7 @@ struct PixelRateZTestLoop
                 vZ[sample] = state.pfnQuantizeDepth(vZ[sample]);
             }
 
-            RDTSC_END(BEBarycentric, 0);
+            RDTSC_END(psContext.pBucketManager, BEBarycentric, 0);
 
             ///@todo: perspective correct vs non-perspective correct clipping?
             // if clip distances are enabled, we need to interpolate for each sample
@@ -795,7 +795,7 @@ struct PixelRateZTestLoop
 
             // ZTest for this sample
             ///@todo Need to uncomment out this bucket.
-            // RDTSC_BEGIN(BEDepthBucket, pDC->drawId);
+            // RDTSC_BEGIN(psContext.pBucketManager, BEDepthBucket, pDC->drawId);
             depthPassMask[sample]   = vCoverageMask[sample];
             stencilPassMask[sample] = vCoverageMask[sample];
             depthPassMask[sample]   = DepthStencilTest(&state,
@@ -806,7 +806,7 @@ struct PixelRateZTestLoop
                                                      vCoverageMask[sample],
                                                      pStencilSample,
                                                      &stencilPassMask[sample]);
-            // RDTSC_END(BEDepthBucket, 0);
+            // RDTSC_END(psContext.pBucketManager, BEDepthBucket, 0);
 
             // early-exit if no pixels passed depth or earlyZ is forced on
             if (psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample]))
@@ -1007,8 +1007,8 @@ void BackendPixelRate(DRAW_CONTEXT*        pDC,
     /// backend
 
 
-    RDTSC_BEGIN(BEPixelRateBackend, pDC->drawId);
-    RDTSC_BEGIN(BESetup, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelRateBackend, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId);
 
     const API_STATE& state = GetApiState(pDC);
 
@@ -1029,7 +1029,7 @@ void BackendPixelRate(DRAW_CONTEXT*        pDC,
                        state.colorHottileEnable,
                        renderBuffers);
 
-    RDTSC_END(BESetup, 0);
+    RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0);
 
     PixelRateZTestLoop<T> PixelRateZTest(pDC,
                                          workerId,
@@ -1075,14 +1075,14 @@ void BackendPixelRate(DRAW_CONTEXT*        pDC,
                     pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
             }
 
-            RDTSC_BEGIN(BEBarycentric, pDC->drawId);
+            RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
 
             CalcPixelBarycentrics(coeffs, psContext);
 
             CalcCentroid<T, false>(
                 &psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
 
-            RDTSC_END(BEBarycentric, 0);
+            RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0);
 
             if (T::bForcedSampleCount)
             {
@@ -1109,12 +1109,12 @@ void BackendPixelRate(DRAW_CONTEXT*        pDC,
 
             if (state.psState.usesSourceDepth)
             {
-                RDTSC_BEGIN(BEBarycentric, pDC->drawId);
+                RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
                 // interpolate and quantize z
                 psContext.vZ = vplaneps(
                     coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
                 psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
-                RDTSC_END(BEBarycentric, 0);
+                RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0);
             }
 
             // pixels that are currently active
@@ -1122,10 +1122,10 @@ void BackendPixelRate(DRAW_CONTEXT*        pDC,
             psContext.oMask      = T::MultisampleT::FullSampleMask();
 
             // execute pixel shader
-            RDTSC_BEGIN(BEPixelShader, pDC->drawId);
+            RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelShader, pDC->drawId);
             state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
             UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
-            RDTSC_END(BEPixelShader, 0);
+            RDTSC_END(pDC->pContext->pBucketMgr, BEPixelShader, 0);
 
             // update stats
             UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
@@ -1159,7 +1159,7 @@ void BackendPixelRate(DRAW_CONTEXT*        pDC,
             for (uint32_t sample = 0; sample < GetNumOMSamples<T>(state.blendState.sampleCount);
                  sample++)
             {
-                RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
+                RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEOutputMerger, pDC->drawId);
                 // center pattern does a single coverage/depth/stencil test, standard pattern tests
                 // all samples
                 uint32_t   coverageSampleNum = (T::bIsCenterPattern) ? 0 : sample;
@@ -1175,7 +1175,7 @@ void BackendPixelRate(DRAW_CONTEXT*        pDC,
                     if (!_simd_movemask_ps(depthMask))
                     {
                         // stencil should already have been written in early/lateZ tests
-                        RDTSC_END(BEOutputMerger, 0);
+                        RDTSC_END(pDC->pContext->pBucketMgr, BEOutputMerger, 0);
                         continue;
                     }
                 }
@@ -1210,10 +1210,10 @@ void BackendPixelRate(DRAW_CONTEXT*        pDC,
                                       pStencilSample,
                                       PixelRateZTest.stencilPassMask[coverageSampleNum]);
                 }
-                RDTSC_END(BEOutputMerger, 0);
+                RDTSC_END(pDC->pContext->pBucketMgr, BEOutputMerger, 0);
             }
         Endtile:
-            RDTSC_BEGIN(BEEndTile, pDC->drawId);
+            RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEndTile, pDC->drawId);
 
             for (uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
             {
@@ -1242,7 +1242,7 @@ void BackendPixelRate(DRAW_CONTEXT*        pDC,
             pStencilBuffer +=
                 (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
 
-            RDTSC_END(BEEndTile, 0);
+            RDTSC_END(pDC->pContext->pBucketMgr, BEEndTile, 0);
 
             psContext.vX.UL     = _simd_add_ps(psContext.vX.UL, dx);
             psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
@@ -1252,7 +1252,7 @@ void BackendPixelRate(DRAW_CONTEXT*        pDC,
         psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
     }
 
-    RDTSC_END(BEPixelRateBackend, 0);
+    RDTSC_END(pDC->pContext->pBucketMgr, BEPixelRateBackend, 0);
 }
 
 template <uint32_t sampleCountT = SWR_MULTISAMPLE_1X,
index 9b0b80f..04e5e3d 100644 (file)
@@ -45,8 +45,8 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
                        SWR_TRIANGLE_DESC&   work,
                        RenderOutputBuffers& renderBuffers)
 {
-    RDTSC_BEGIN(BESampleRateBackend, pDC->drawId);
-    RDTSC_BEGIN(BESetup, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESampleRateBackend, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId);
 
     void* pWorkerData      = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
     const API_STATE& state = GetApiState(pDC);
@@ -65,7 +65,7 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
                        state.colorHottileEnable,
                        renderBuffers);
 
-    RDTSC_END(BESetup, 0);
+    RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0);
 
     psContext.vY.UL     = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
     psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
@@ -95,14 +95,14 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
                     pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
             }
 
-            RDTSC_BEGIN(BEBarycentric, pDC->drawId);
+            RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
 
             CalcPixelBarycentrics(coeffs, psContext);
 
             CalcCentroid<T, false>(
                 &psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
 
-            RDTSC_END(BEBarycentric, 0);
+            RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0);
 
             for (uint32_t sample = 0; sample < T::MultisampleT::numSamples; sample++)
             {
@@ -128,7 +128,7 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
                         coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
                     }
 
-                    RDTSC_BEGIN(BEBarycentric, pDC->drawId);
+                    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
 
                     // calculate per sample positions
                     psContext.vX.sample = _simd_add_ps(psContext.vX.UL, samplePos.vX(sample));
@@ -144,7 +144,7 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
                                             psContext.vJ.sample);
                     psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
 
-                    RDTSC_END(BEBarycentric, 0);
+                    RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0);
 
                     // interpolate user clip distance if available
                     if (state.backendState.clipDistanceMask)
@@ -162,7 +162,7 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
                     // Early-Z?
                     if (T::bCanEarlyZ)
                     {
-                        RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId);
+                        RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEarlyDepthTest, pDC->drawId);
                         depthPassMask = DepthStencilTest(&state,
                                                          work.triFlags.frontFacing,
                                                          work.triFlags.viewportIndex,
@@ -174,7 +174,7 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
                         AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask),
                                                                  _simd_movemask_ps(stencilPassMask),
                                                                  _simd_movemask_ps(vCoverageMask)));
-                        RDTSC_END(BEEarlyDepthTest, 0);
+                        RDTSC_END(pDC->pContext->pBucketMgr, BEEarlyDepthTest, 0);
 
                         // early-exit if no samples passed depth or earlyZ is forced on.
                         if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
@@ -201,9 +201,9 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
                     psContext.activeMask  = _simd_castps_si(vCoverageMask);
 
                     // execute pixel shader
-                    RDTSC_BEGIN(BEPixelShader, pDC->drawId);
+                    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelShader, pDC->drawId);
                     state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
-                    RDTSC_END(BEPixelShader, 0);
+                    RDTSC_END(pDC->pContext->pBucketMgr, BEPixelShader, 0);
 
                     // update stats
                     UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
@@ -214,7 +214,7 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
                     // late-Z
                     if (!T::bCanEarlyZ)
                     {
-                        RDTSC_BEGIN(BELateDepthTest, pDC->drawId);
+                        RDTSC_BEGIN(pDC->pContext->pBucketMgr, BELateDepthTest, pDC->drawId);
                         depthPassMask = DepthStencilTest(&state,
                                                          work.triFlags.frontFacing,
                                                          work.triFlags.viewportIndex,
@@ -226,7 +226,7 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
                         AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask),
                                                                 _simd_movemask_ps(stencilPassMask),
                                                                 _simd_movemask_ps(vCoverageMask)));
-                        RDTSC_END(BELateDepthTest, 0);
+                        RDTSC_END(pDC->pContext->pBucketMgr, BELateDepthTest, 0);
 
                         if (!_simd_movemask_ps(depthPassMask))
                         {
@@ -251,7 +251,7 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
                     UPDATE_STAT_BE(DepthPassCount, statCount);
 
                     // output merger
-                    RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
+                    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEOutputMerger, pDC->drawId);
 
                     OutputMerger8x2(pDC,
                                     psContext,
@@ -278,7 +278,7 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
                                           pStencilSample,
                                           stencilPassMask);
                     }
-                    RDTSC_END(BEOutputMerger, 0);
+                    RDTSC_END(pDC->pContext->pBucketMgr, BEOutputMerger, 0);
                 }
                 work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
             }
@@ -286,7 +286,7 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
         Endtile:
             ATTR_UNUSED;
 
-            RDTSC_BEGIN(BEEndTile, pDC->drawId);
+            RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEndTile, pDC->drawId);
 
             if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
             {
@@ -309,7 +309,7 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
             pStencilBuffer +=
                 (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
 
-            RDTSC_END(BEEndTile, 0);
+            RDTSC_END(pDC->pContext->pBucketMgr, BEEndTile, 0);
 
             psContext.vX.UL     = _simd_add_ps(psContext.vX.UL, dx);
             psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
@@ -319,7 +319,7 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
         psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
     }
 
-    RDTSC_END(BESampleRateBackend, 0);
+    RDTSC_END(pDC->pContext->pBucketMgr, BESampleRateBackend, 0);
 }
 
 // Recursive template used to auto-nest conditionals.  Converts dynamic enum function
index 46aabcd..2b86826 100644 (file)
@@ -45,8 +45,8 @@ void BackendSingleSample(DRAW_CONTEXT*        pDC,
                          SWR_TRIANGLE_DESC&   work,
                          RenderOutputBuffers& renderBuffers)
 {
-    RDTSC_BEGIN(BESingleSampleBackend, pDC->drawId);
-    RDTSC_BEGIN(BESetup, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESingleSampleBackend, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId);
 
     void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
 
@@ -66,7 +66,7 @@ void BackendSingleSample(DRAW_CONTEXT*        pDC,
                        state.colorHottileEnable,
                        renderBuffers);
 
-    RDTSC_END(BESetup, 1);
+    RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 1);
 
     psContext.vY.UL     = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
     psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
@@ -114,7 +114,7 @@ void BackendSingleSample(DRAW_CONTEXT*        pDC,
                         pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
                 }
 
-                RDTSC_BEGIN(BEBarycentric, pDC->drawId);
+                RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
 
                 CalcPixelBarycentrics(coeffs, psContext);
 
@@ -126,7 +126,7 @@ void BackendSingleSample(DRAW_CONTEXT*        pDC,
                     coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
                 psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
 
-                RDTSC_END(BEBarycentric, 1);
+                RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 1);
 
                 // interpolate user clip distance if available
                 if (state.backendState.clipDistanceMask)
@@ -144,7 +144,7 @@ void BackendSingleSample(DRAW_CONTEXT*        pDC,
                 // Early-Z?
                 if (T::bCanEarlyZ)
                 {
-                    RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId);
+                    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEarlyDepthTest, pDC->drawId);
                     depthPassMask = DepthStencilTest(&state,
                                                      work.triFlags.frontFacing,
                                                      work.triFlags.viewportIndex,
@@ -156,7 +156,7 @@ void BackendSingleSample(DRAW_CONTEXT*        pDC,
                     AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask),
                                                                _simd_movemask_ps(stencilPassMask),
                                                                _simd_movemask_ps(vCoverageMask)));
-                    RDTSC_END(BEEarlyDepthTest, 0);
+                    RDTSC_END(pDC->pContext->pBucketMgr, BEEarlyDepthTest, 0);
 
                     // early-exit if no pixels passed depth or earlyZ is forced on
                     if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
@@ -182,9 +182,9 @@ void BackendSingleSample(DRAW_CONTEXT*        pDC,
                 psContext.activeMask  = _simd_castps_si(vCoverageMask);
 
                 // execute pixel shader
-                RDTSC_BEGIN(BEPixelShader, pDC->drawId);
+                RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelShader, pDC->drawId);
                 state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
-                RDTSC_END(BEPixelShader, 0);
+                RDTSC_END(pDC->pContext->pBucketMgr, BEPixelShader, 0);
 
                 // update stats
                 UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
@@ -195,7 +195,7 @@ void BackendSingleSample(DRAW_CONTEXT*        pDC,
                 // late-Z
                 if (!T::bCanEarlyZ)
                 {
-                    RDTSC_BEGIN(BELateDepthTest, pDC->drawId);
+                    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BELateDepthTest, pDC->drawId);
                     depthPassMask = DepthStencilTest(&state,
                                                      work.triFlags.frontFacing,
                                                      work.triFlags.viewportIndex,
@@ -207,7 +207,7 @@ void BackendSingleSample(DRAW_CONTEXT*        pDC,
                     AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask),
                                                               _simd_movemask_ps(stencilPassMask),
                                                               _simd_movemask_ps(vCoverageMask)));
-                    RDTSC_END(BELateDepthTest, 0);
+                    RDTSC_END(pDC->pContext->pBucketMgr, BELateDepthTest, 0);
 
                     if (!_simd_movemask_ps(depthPassMask))
                     {
@@ -236,7 +236,7 @@ void BackendSingleSample(DRAW_CONTEXT*        pDC,
                 UPDATE_STAT_BE(DepthPassCount, statCount);
 
                 // output merger
-                RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
+                RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEOutputMerger, pDC->drawId);
 
                 OutputMerger8x2(pDC,
                                 psContext,
@@ -263,11 +263,11 @@ void BackendSingleSample(DRAW_CONTEXT*        pDC,
                                       pStencilBuffer,
                                       stencilPassMask);
                 }
-                RDTSC_END(BEOutputMerger, 0);
+                RDTSC_END(pDC->pContext->pBucketMgr, BEOutputMerger, 0);
             }
 
         Endtile:
-            RDTSC_BEGIN(BEEndTile, pDC->drawId);
+            RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEndTile, pDC->drawId);
 
             work.coverageMask[0] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
             if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
@@ -291,7 +291,7 @@ void BackendSingleSample(DRAW_CONTEXT*        pDC,
             pStencilBuffer +=
                 (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
 
-            RDTSC_END(BEEndTile, 0);
+            RDTSC_END(pDC->pContext->pBucketMgr, BEEndTile, 0);
 
             psContext.vX.UL     = _simd_add_ps(psContext.vX.UL, dx);
             psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
@@ -301,7 +301,7 @@ void BackendSingleSample(DRAW_CONTEXT*        pDC,
         psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
     }
 
-    RDTSC_END(BESingleSampleBackend, 0);
+    RDTSC_END(pDC->pContext->pBucketMgr, BESingleSampleBackend, 0);
 }
 
 // Recursive template used to auto-nest conditionals.  Converts dynamic enum function
index 6dc703c..dbc387e 100644 (file)
@@ -373,7 +373,10 @@ uint32_t SIMDCALL EarlyRasterizer(SIMDBBOX_T<SIMD_T>& er_bbox,
     Integer<SIMD_T> vNegB1 = SIMD_T::mullo_epi32(vBi[1], SIMD_T::set1_epi32(-1));
     Integer<SIMD_T> vNegB2 = SIMD_T::mullo_epi32(vBi[2], SIMD_T::set1_epi32(-1));
 
-    RDTSC_EVENT(FEEarlyRastEnter, _mm_popcnt_u32(oneTileMask & triMask), 0);
+    RDTSC_EVENT(pDC->pContext->pBucketMgr,
+                FEEarlyRastEnter,
+                _mm_popcnt_u32(oneTileMask & triMask),
+                0);
 
     Integer<SIMD_T> vShiftCntrl = EarlyRastHelper<SIMD_T>::InitShiftCntrl();
     Integer<SIMD_T> vCwTris     = SIMD_T::set1_epi32(cwTrisMask);
@@ -639,7 +642,10 @@ uint32_t SIMDCALL EarlyRasterizer(SIMDBBOX_T<SIMD_T>& er_bbox,
 
     if (triMask ^ oldTriMask)
     {
-        RDTSC_EVENT(FEEarlyRastExit, _mm_popcnt_u32(triMask & oneTileMask), 0);
+        RDTSC_EVENT(pDC->pContext->pBucketMgr,
+                    FEEarlyRastExit,
+                    _mm_popcnt_u32(triMask & oneTileMask),
+                    0);
     }
     return triMask;
 }
@@ -668,7 +674,7 @@ void SIMDCALL BinTrianglesImpl(DRAW_CONTEXT*          pDC,
 {
     const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx);
 
-    RDTSC_BEGIN(FEBinTriangles, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEBinTriangles, pDC->drawId);
 
     const API_STATE&          state     = GetApiState(pDC);
     const SWR_RASTSTATE&      rastState = state.rastState;
@@ -806,7 +812,10 @@ void SIMDCALL BinTrianglesImpl(DRAW_CONTEXT*          pDC,
 
     if (origTriMask ^ triMask)
     {
-        RDTSC_EVENT(FECullZeroAreaAndBackface, _mm_popcnt_u32(origTriMask ^ triMask), 0);
+        RDTSC_EVENT(pDC->pContext->pBucketMgr,
+                    FECullZeroAreaAndBackface,
+                    _mm_popcnt_u32(origTriMask ^ triMask),
+                    0);
     }
 
     AR_EVENT(CullInfoEvent(pDC->drawId, cullZeroAreaMask, cullTris, origTriMask));
@@ -917,7 +926,10 @@ void SIMDCALL BinTrianglesImpl(DRAW_CONTEXT*          pDC,
 
         if (origTriMask ^ triMask)
         {
-            RDTSC_EVENT(FECullBetweenCenters, _mm_popcnt_u32(origTriMask ^ triMask), 0);
+            RDTSC_EVENT(pDC->pContext->pBucketMgr,
+                        FECullBetweenCenters,
+                        _mm_popcnt_u32(origTriMask ^ triMask),
+                        0);
         }
     }
 
@@ -1017,7 +1029,7 @@ void SIMDCALL BinTrianglesImpl(DRAW_CONTEXT*          pDC,
 
             if (!triMask)
             {
-                RDTSC_END(FEBinTriangles, 1);
+                RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1);
                 return;
             }
         }
@@ -1029,7 +1041,7 @@ endBinTriangles:
 
     if (!triMask)
     {
-        RDTSC_END(FEBinTriangles, 1);
+        RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1);
         return;
     }
 
@@ -1065,7 +1077,7 @@ endBinTriangles:
         BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(
             pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
 
-        RDTSC_END(FEBinTriangles, 1);
+        RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1);
         return;
     }
     else if (rastState.fillMode == SWR_FILLMODE_POINT)
@@ -1078,7 +1090,7 @@ endBinTriangles:
         BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(
             pDC, pa, workerId, &tri[2], triMask, primID, viewportIdx, rtIdx);
 
-        RDTSC_END(FEBinTriangles, 1);
+        RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1);
         return;
     }
 
@@ -1194,7 +1206,7 @@ endBinTriangles:
         triMask &= ~(1 << triIndex);
     }
 
-    RDTSC_END(FEBinTriangles, 1);
+    RDTSC_END(pDC->pContext->pBucketMgr, FEBinTriangles, 1);
 }
 
 template <typename CT>
@@ -1274,7 +1286,7 @@ void BinPostSetupPointsImpl(DRAW_CONTEXT*          pDC,
                             Integer<SIMD_T> const& viewportIdx,
                             Integer<SIMD_T> const& rtIdx)
 {
-    RDTSC_BEGIN(FEBinPoints, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEBinPoints, pDC->drawId);
 
     Vec4<SIMD_T>& primVerts = prim[0];
 
@@ -1572,7 +1584,7 @@ void BinPostSetupPointsImpl(DRAW_CONTEXT*          pDC,
         }
     }
 
-    RDTSC_END(FEBinPoints, 1);
+    RDTSC_END(pDC->pContext->pBucketMgr, FEBinPoints, 1);
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -1674,7 +1686,7 @@ void BinPostSetupLinesImpl(DRAW_CONTEXT*          pDC,
 {
     const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx);
 
-    RDTSC_BEGIN(FEBinLines, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEBinLines, pDC->drawId);
 
     const API_STATE&     state     = GetApiState(pDC);
     const SWR_RASTSTATE& rastState = state.rastState;
@@ -1867,7 +1879,7 @@ void BinPostSetupLinesImpl(DRAW_CONTEXT*          pDC,
 
 endBinLines:
 
-    RDTSC_END(FEBinLines, 1);
+    RDTSC_END(pDC->pContext->pBucketMgr, FEBinLines, 1);
 }
 
 //////////////////////////////////////////////////////////////////////////
index 87be5bc..c399caf 100644 (file)
@@ -185,10 +185,10 @@ void ClipRectangles(DRAW_CONTEXT*      pDC,
                     simdscalari const& viewportIdx,
                     simdscalari const& rtIdx)
 {
-    RDTSC_BEGIN(FEClipRectangles, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipRectangles, pDC->drawId);
     Clipper<SIMD256, 3> clipper(workerId, pDC);
     clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
-    RDTSC_END(FEClipRectangles, 1);
+    RDTSC_END(pDC->pContext->pBucketMgr, FEClipRectangles, 1);
 }
 
 void ClipTriangles(DRAW_CONTEXT*      pDC,
@@ -200,10 +200,10 @@ void ClipTriangles(DRAW_CONTEXT*      pDC,
                    simdscalari const& viewportIdx,
                    simdscalari const& rtIdx)
 {
-    RDTSC_BEGIN(FEClipTriangles, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipTriangles, pDC->drawId);
     Clipper<SIMD256, 3> clipper(workerId, pDC);
     clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
-    RDTSC_END(FEClipTriangles, 1);
+    RDTSC_END(pDC->pContext->pBucketMgr, FEClipTriangles, 1);
 }
 
 void ClipLines(DRAW_CONTEXT*      pDC,
@@ -215,10 +215,10 @@ void ClipLines(DRAW_CONTEXT*      pDC,
                simdscalari const& viewportIdx,
                simdscalari const& rtIdx)
 {
-    RDTSC_BEGIN(FEClipLines, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipLines, pDC->drawId);
     Clipper<SIMD256, 2> clipper(workerId, pDC);
     clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
-    RDTSC_END(FEClipLines, 1);
+    RDTSC_END(pDC->pContext->pBucketMgr, FEClipLines, 1);
 }
 
 void ClipPoints(DRAW_CONTEXT*      pDC,
@@ -230,10 +230,10 @@ void ClipPoints(DRAW_CONTEXT*      pDC,
                 simdscalari const& viewportIdx,
                 simdscalari const& rtIdx)
 {
-    RDTSC_BEGIN(FEClipPoints, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipPoints, pDC->drawId);
     Clipper<SIMD256, 1> clipper(workerId, pDC);
     clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
-    RDTSC_END(FEClipPoints, 1);
+    RDTSC_END(pDC->pContext->pBucketMgr, FEClipPoints, 1);
 }
 
 #if USE_SIMD16_FRONTEND
@@ -246,7 +246,7 @@ void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT*        pDC,
                                     simd16scalari const& viewportIdx,
                                     simd16scalari const& rtIdx)
 {
-    RDTSC_BEGIN(FEClipRectangles, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipRectangles, pDC->drawId);
 
     enum
     {
@@ -258,7 +258,7 @@ void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT*        pDC,
     pa.useAlternateOffset = false;
     clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
 
-    RDTSC_END(FEClipRectangles, 1);
+    RDTSC_END(pDC->pContext->pBucketMgr, FEClipRectangles, 1);
 }
 
 void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT*        pDC,
@@ -270,7 +270,7 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT*        pDC,
                                    simd16scalari const& viewportIdx,
                                    simd16scalari const& rtIdx)
 {
-    RDTSC_BEGIN(FEClipTriangles, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipTriangles, pDC->drawId);
 
     enum
     {
@@ -282,7 +282,7 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT*        pDC,
     pa.useAlternateOffset = false;
     clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
 
-    RDTSC_END(FEClipTriangles, 1);
+    RDTSC_END(pDC->pContext->pBucketMgr, FEClipTriangles, 1);
 }
 
 void SIMDCALL ClipLines_simd16(DRAW_CONTEXT*        pDC,
@@ -294,7 +294,7 @@ void SIMDCALL ClipLines_simd16(DRAW_CONTEXT*        pDC,
                                simd16scalari const& viewportIdx,
                                simd16scalari const& rtIdx)
 {
-    RDTSC_BEGIN(FEClipLines, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipLines, pDC->drawId);
 
     enum
     {
@@ -306,7 +306,7 @@ void SIMDCALL ClipLines_simd16(DRAW_CONTEXT*        pDC,
     pa.useAlternateOffset = false;
     clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
 
-    RDTSC_END(FEClipLines, 1);
+    RDTSC_END(pDC->pContext->pBucketMgr, FEClipLines, 1);
 }
 
 void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT*        pDC,
@@ -318,7 +318,7 @@ void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT*        pDC,
                                 simd16scalari const& viewportIdx,
                                 simd16scalari const& rtIdx)
 {
-    RDTSC_BEGIN(FEClipPoints, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEClipPoints, pDC->drawId);
 
     enum
     {
@@ -330,7 +330,7 @@ void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT*        pDC,
     pa.useAlternateOffset = false;
     clipper.ExecuteStage(pa, prims, primMask, primId, viewportIdx, rtIdx);
 
-    RDTSC_END(FEClipPoints, 1);
+    RDTSC_END(pDC->pContext->pBucketMgr, FEClipPoints, 1);
 }
 
 #endif
index 33c1653..1965274 100644 (file)
@@ -781,7 +781,7 @@ public:
 
         if (clipMask)
         {
-            RDTSC_BEGIN(FEGuardbandClip, pa.pDC->drawId);
+            RDTSC_BEGIN(pa.pDC->pContext->pBucketMgr, FEGuardbandClip, pa.pDC->drawId);
             // we have to clip tris, execute the clipper, which will also
             // call the binner
             ClipSimd(prim,
@@ -791,7 +791,7 @@ public:
                      primId,
                      viewportIdx,
                      rtIdx);
-            RDTSC_END(FEGuardbandClip, 1);
+            RDTSC_END(pa.pDC->pContext->pBucketMgr, FEGuardbandClip, 1);
         }
         else if (validMask)
         {
index 8849e60..d17baea 100644 (file)
@@ -40,6 +40,7 @@
 #include "core/fifo.hpp"
 #include "core/knobs.h"
 #include "common/intrin.h"
+#include "common/rdtsc_buckets.h"
 #include "core/threads.h"
 #include "ringbuffer.h"
 #include "archrast/archrast.h"
@@ -523,14 +524,14 @@ struct SWR_CONTEXT
     HotTileMgr* pHotTileMgr;
 
     // Callback functions, passed in at create context time
-    PFN_LOAD_TILE                   pfnLoadTile;
-    PFN_STORE_TILE                  pfnStoreTile;
-    PFN_TRANSLATE_GFXPTR_FOR_READ   pfnTranslateGfxptrForRead;
-    PFN_TRANSLATE_GFXPTR_FOR_WRITE  pfnTranslateGfxptrForWrite;
-    PFN_MAKE_GFXPTR                 pfnMakeGfxPtr;
-    PFN_UPDATE_SO_WRITE_OFFSET      pfnUpdateSoWriteOffset;
-    PFN_UPDATE_STATS                pfnUpdateStats;
-    PFN_UPDATE_STATS_FE             pfnUpdateStatsFE;
+    PFN_LOAD_TILE                  pfnLoadTile;
+    PFN_STORE_TILE                 pfnStoreTile;
+    PFN_TRANSLATE_GFXPTR_FOR_READ  pfnTranslateGfxptrForRead;
+    PFN_TRANSLATE_GFXPTR_FOR_WRITE pfnTranslateGfxptrForWrite;
+    PFN_MAKE_GFXPTR                pfnMakeGfxPtr;
+    PFN_UPDATE_SO_WRITE_OFFSET     pfnUpdateSoWriteOffset;
+    PFN_UPDATE_STATS               pfnUpdateStats;
+    PFN_UPDATE_STATS_FE            pfnUpdateStatsFE;
 
 
     // Global Stats
@@ -550,6 +551,8 @@ struct SWR_CONTEXT
 
     // ArchRast thread contexts.
     HANDLE* pArContext;
+
+    BucketManager *pBucketMgr;
 };
 
 #define UPDATE_STAT_BE(name, count)                   \
@@ -568,11 +571,11 @@ struct SWR_CONTEXT
 #define AR_API_CTX pDC->pContext->pArContext[pContext->NumWorkerThreads]
 
 #ifdef KNOB_ENABLE_RDTSC
-#define RDTSC_BEGIN(type, drawid) RDTSC_START(type)
-#define RDTSC_END(type, count) RDTSC_STOP(type, count, 0)
+#define RDTSC_BEGIN(pBucketMgr, type, drawid) RDTSC_START(pBucketMgr, type)
+#define RDTSC_END(pBucketMgr, type, count) RDTSC_STOP(pBucketMgr, type, count, 0)
 #else
-#define RDTSC_BEGIN(type, count)
-#define RDTSC_END(type, count)
+#define RDTSC_BEGIN(pBucketMgr, type, drawid)
+#define RDTSC_END(pBucketMgr, type, count)
 #endif
 
 #ifdef KNOB_ENABLE_AR
index 85b2e8d..5eda4d7 100644 (file)
@@ -135,7 +135,7 @@ void ProcessClear(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, v
 /// @todo This should go away when we switch this to use compute threading.
 void ProcessStoreTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData)
 {
-    RDTSC_BEGIN(FEProcessStoreTiles, pDC->drawId);
+    RDTSC_BEGIN(pContext->pBucketMgr, FEProcessStoreTiles, pDC->drawId);
     MacroTileMgr*     pTileMgr = pDC->pTileMgr;
     STORE_TILES_DESC* pDesc    = (STORE_TILES_DESC*)pUserData;
 
@@ -160,7 +160,7 @@ void ProcessStoreTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t worker
         }
     }
 
-    RDTSC_END(FEProcessStoreTiles, 0);
+    RDTSC_END(pContext->pBucketMgr, FEProcessStoreTiles, 0);
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -175,7 +175,7 @@ void ProcessDiscardInvalidateTiles(SWR_CONTEXT*  pContext,
                                    uint32_t      workerId,
                                    void*         pUserData)
 {
-    RDTSC_BEGIN(FEProcessInvalidateTiles, pDC->drawId);
+    RDTSC_BEGIN(pContext->pBucketMgr, FEProcessInvalidateTiles, pDC->drawId);
     DISCARD_INVALIDATE_TILES_DESC* pDesc    = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
     MacroTileMgr*                  pTileMgr = pDC->pTileMgr;
 
@@ -214,7 +214,7 @@ void ProcessDiscardInvalidateTiles(SWR_CONTEXT*  pContext,
         }
     }
 
-    RDTSC_END(FEProcessInvalidateTiles, 0);
+    RDTSC_END(pContext->pBucketMgr, FEProcessInvalidateTiles, 0);
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -518,7 +518,7 @@ static INLINE simd16scalari GenerateMask16(uint32_t numItemsRemaining)
 static void StreamOut(
     DRAW_CONTEXT* pDC, PA_STATE& pa, uint32_t workerId, uint32_t* pPrimData, uint32_t streamIndex)
 {
-    RDTSC_BEGIN(FEStreamout, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEStreamout, pDC->drawId);
 
     const API_STATE&           state   = GetApiState(pDC);
     const SWR_STREAMOUT_STATE& soState = state.soState;
@@ -598,7 +598,7 @@ static void StreamOut(
     UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded);
     UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten);
 
-    RDTSC_END(FEStreamout, 1);
+    RDTSC_END(pDC->pContext->pBucketMgr, FEStreamout, 1);
 }
 
 #if USE_SIMD16_FRONTEND
@@ -834,7 +834,7 @@ static void GeometryShaderStage(DRAW_CONTEXT* pDC,
 #endif
                                 simdscalari const& primID)
 {
-    RDTSC_BEGIN(FEGeometryShader, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEGeometryShader, pDC->drawId);
 
     void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
 
@@ -1178,7 +1178,7 @@ static void GeometryShaderStage(DRAW_CONTEXT* pDC,
     UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount);
     UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated);
     AR_EVENT(GSPrimInfo(numInputPrims, totalPrimsGenerated, numVertsPerPrim * numInputPrims));
-    RDTSC_END(FEGeometryShader, 1);
+    RDTSC_END(pDC->pContext->pBucketMgr, FEGeometryShader, 1);
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -1372,9 +1372,9 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
     hsContext.mask = GenerateMask(numPrims);
 
     // Run the HS
-    RDTSC_BEGIN(FEHullShader, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEHullShader, pDC->drawId);
     state.pfnHsFunc(GetPrivateState(pDC), pWorkerData, &hsContext);
-    RDTSC_END(FEHullShader, 0);
+    RDTSC_END(pDC->pContext->pBucketMgr, FEHullShader, 0);
 
     UPDATE_STAT_FE(HsInvocations, numPrims);
     AR_EVENT(HSStats((HANDLE)&hsContext.stats));
@@ -1385,10 +1385,10 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
     {
         // Run Tessellator
         SWR_TS_TESSELLATED_DATA tsData = {0};
-        RDTSC_BEGIN(FETessellation, pDC->drawId);
+        RDTSC_BEGIN(pDC->pContext->pBucketMgr, FETessellation, pDC->drawId);
         TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData);
         AR_EVENT(TessPrimCount(1));
-        RDTSC_END(FETessellation, 0);
+        RDTSC_END(pDC->pContext->pBucketMgr, FETessellation, 0);
 
         if (tsData.NumPrimitives == 0)
         {
@@ -1441,9 +1441,9 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
         {
             dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations);
 
-            RDTSC_BEGIN(FEDomainShader, pDC->drawId);
+            RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEDomainShader, pDC->drawId);
             state.pfnDsFunc(GetPrivateState(pDC), pWorkerData, &dsContext);
-            RDTSC_END(FEDomainShader, 0);
+            RDTSC_END(pDC->pContext->pBucketMgr, FEDomainShader, 0);
 
             AR_EVENT(DSStats((HANDLE)&dsContext.stats));
 
@@ -1524,14 +1524,14 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
 #else
                     simdvector prim[3]; // Only deal with triangles, lines, or points
 #endif
-                    RDTSC_BEGIN(FEPAAssemble, pDC->drawId);
+                    RDTSC_BEGIN(pDC->pContext->pBucketMgr, FEPAAssemble, pDC->drawId);
                     bool assemble =
 #if USE_SIMD16_FRONTEND
                         tessPa.Assemble(VERTEX_POSITION_SLOT, prim_simd16);
 #else
                         tessPa.Assemble(VERTEX_POSITION_SLOT, prim);
 #endif
-                    RDTSC_END(FEPAAssemble, 1);
+                    RDTSC_END(pDC->pContext->pBucketMgr, FEPAAssemble, 1);
                     SWR_ASSERT(assemble);
 
                     SWR_ASSERT(pfnClipFunc);
@@ -1663,7 +1663,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
     }
 #endif
 
-    RDTSC_BEGIN(FEProcessDraw, pDC->drawId);
+    RDTSC_BEGIN(pContext->pBucketMgr, FEProcessDraw, pDC->drawId);
 
     void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
 
@@ -1895,7 +1895,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
 #endif
                 }
                 // 1. Execute FS/VS for a single SIMD.
-                RDTSC_BEGIN(FEFetchShader, pDC->drawId);
+                RDTSC_BEGIN(pContext->pBucketMgr, FEFetchShader, pDC->drawId);
 #if USE_SIMD16_SHADERS
                 state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_lo, vin);
 #else
@@ -1906,7 +1906,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
                     state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_hi, vin_hi);
                 }
 #endif
-                RDTSC_END(FEFetchShader, 0);
+                RDTSC_END(pContext->pBucketMgr, FEFetchShader, 0);
 
                 // forward fetch generated vertex IDs to the vertex shader
 #if USE_SIMD16_SHADERS
@@ -1950,7 +1950,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
                 if (!KNOB_TOSS_FETCH)
 #endif
                 {
-                    RDTSC_BEGIN(FEVertexShader, pDC->drawId);
+                    RDTSC_BEGIN(pContext->pBucketMgr, FEVertexShader, pDC->drawId);
 #if USE_SIMD16_VS
                     state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo);
                     AR_EVENT(VSStats((HANDLE)&vsContext_lo.stats));
@@ -1964,7 +1964,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
                         AR_EVENT(VSStats((HANDLE)&vsContext_hi.stats));
                     }
 #endif
-                    RDTSC_END(FEVertexShader, 0);
+                    RDTSC_END(pContext->pBucketMgr, FEVertexShader, 0);
 
                     UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
                 }
@@ -1975,9 +1975,9 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
             {
                 simd16vector prim_simd16[MAX_NUM_VERTS_PER_PRIM];
 
-                RDTSC_START(FEPAAssemble);
+                RDTSC_START(pContext->pBucketMgr, FEPAAssemble);
                 bool assemble = pa.Assemble(VERTEX_POSITION_SLOT, prim_simd16);
-                RDTSC_STOP(FEPAAssemble, 1, 0);
+                RDTSC_STOP(pContext->pBucketMgr, FEPAAssemble, 1, 0);
 
 #if KNOB_ENABLE_TOSS_POINTS
                 if (!KNOB_TOSS_FETCH)
@@ -2190,9 +2190,9 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
             if (i < endVertex)
             {
                 // 1. Execute FS/VS for a single SIMD.
-                RDTSC_BEGIN(FEFetchShader, pDC->drawId);
+                RDTSC_BEGIN(pContext->pBucketMgr, FEFetchShader, pDC->drawId);
                 state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo, vout);
-                RDTSC_END(FEFetchShader, 0);
+                RDTSC_END(pContext->pBucketMgr, FEFetchShader, 0);
 
                 // forward fetch generated vertex IDs to the vertex shader
                 vsContext.VertexID = fetchInfo.VertexID;
@@ -2212,9 +2212,9 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
                 if (!KNOB_TOSS_FETCH)
 #endif
                 {
-                    RDTSC_BEGIN(FEVertexShader, pDC->drawId);
+                    RDTSC_BEGIN(pContext->pBucketMgr, FEVertexShader, pDC->drawId);
                     state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext);
-                    RDTSC_END(FEVertexShader, 0);
+                    RDTSC_END(pContext->pBucketMgr, FEVertexShader, 0);
 
                     UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
                     AR_EVENT(VSStats((HANDLE)&vsContext.stats));
@@ -2226,9 +2226,9 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
             {
                 simdvector prim[MAX_NUM_VERTS_PER_PRIM];
                 // PaAssemble returns false if there is not enough verts to assemble.
-                RDTSC_BEGIN(FEPAAssemble, pDC->drawId);
+                RDTSC_BEGIN(pContext->pBucketMgr, FEPAAssemble, pDC->drawId);
                 bool assemble = pa.Assemble(VERTEX_POSITION_SLOT, prim);
-                RDTSC_END(FEPAAssemble, 1);
+                RDTSC_END(pContext->pBucketMgr, FEPAAssemble, 1);
 
 #if KNOB_ENABLE_TOSS_POINTS
                 if (!KNOB_TOSS_FETCH)
@@ -2339,7 +2339,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
 
 #endif
 
-    RDTSC_END(FEProcessDraw, numPrims * work.numInstances);
+    RDTSC_END(pContext->pBucketMgr, FEProcessDraw, numPrims * work.numInstances);
 }
 
 struct FEDrawChooser
index a392035..44c486c 100644 (file)
@@ -53,7 +53,7 @@ void RasterizeLine(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, voi
 #endif
 
     // bloat line to two tris and call the triangle rasterizer twice
-    RDTSC_BEGIN(BERasterizeLine, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizeLine, pDC->drawId);
 
     const API_STATE&     state     = GetApiState(pDC);
     const SWR_RASTSTATE& rastState = state.rastState;
@@ -245,7 +245,7 @@ void RasterizeLine(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, voi
         pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
     }
 
-    RDTSC_BEGIN(BERasterizeLine, 1);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizeLine, 1);
 }
 
 void RasterizeSimplePoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
@@ -308,9 +308,9 @@ void RasterizeSimplePoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTi
                       renderBuffers,
                       triDesc.triFlags.renderTargetArrayIndex);
 
-    RDTSC_BEGIN(BEPixelBackend, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelBackend, pDC->drawId);
     backendFuncs.pfnBackend(pDC, workerId, tileAlignedX, tileAlignedY, triDesc, renderBuffers);
-    RDTSC_END(BEPixelBackend, 0);
+    RDTSC_END(pDC->pContext->pBucketMgr, BEPixelBackend, 0);
 }
 
 void RasterizeTriPoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
index 20206ea..4a0fd09 100644 (file)
@@ -842,10 +842,10 @@ struct GenerateSVInnerCoverage<RT, AllEdgesValidT, InnerConservativeCoverageT>
         }
 
         // not trivial accept or reject, must rasterize full tile
-        RDTSC_BEGIN(BERasterizePartial, pDC->drawId);
+        RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizePartial, pDC->drawId);
         innerCoverageMask = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(
             pDC, startQuadEdgesAdj, pRastEdges);
-        RDTSC_END(BERasterizePartial, 0);
+        RDTSC_END(pDC->pContext->pBucketMgr, BERasterizePartial, 0);
     }
 };
 
@@ -927,8 +927,8 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
         return;
     }
 #endif
-    RDTSC_BEGIN(BERasterizeTriangle, pDC->drawId);
-    RDTSC_BEGIN(BETriangleSetup, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizeTriangle, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BETriangleSetup, pDC->drawId);
 
     const API_STATE&     state        = GetApiState(pDC);
     const SWR_RASTSTATE& rastState    = state.rastState;
@@ -1103,7 +1103,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
                intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 &&
                intersect.ymax >= 0);
 
-    RDTSC_END(BETriangleSetup, 0);
+    RDTSC_END(pDC->pContext->pBucketMgr, BETriangleSetup, 0);
 
     // update triangle desc
     uint32_t minTileX  = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
@@ -1115,12 +1115,12 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
 
     if (numTilesX == 0 || numTilesY == 0)
     {
-        RDTSC_EVENT(BEEmptyTriangle, 1, 0);
-        RDTSC_END(BERasterizeTriangle, 1);
+        RDTSC_EVENT(pDC->pContext->pBucketMgr, BEEmptyTriangle, 1, 0);
+        RDTSC_END(pDC->pContext->pBucketMgr, BERasterizeTriangle, 1);
         return;
     }
 
-    RDTSC_BEGIN(BEStepSetup, pDC->drawId);
+    RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEStepSetup, pDC->drawId);
 
     // Step to pixel center of top-left pixel of the triangle bbox
     // Align intersect bbox (top/left) to raster tile's (top/left).
@@ -1232,7 +1232,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
         }
     }
 
-    RDTSC_END(BEStepSetup, 0);
+    RDTSC_END(pDC->pContext->pBucketMgr, BEStepSetup, 0);
 
     uint32_t tY   = minTileY;
     uint32_t tX   = minTileX;
@@ -1297,7 +1297,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
                         {
                             triDesc.innerCoverageMask = 0xffffffffffffffffULL;
                         }
-                        RDTSC_EVENT(BETrivialAccept, 1, 0);
+                        RDTSC_EVENT(pDC->pContext->pBucketMgr, BETrivialAccept, 1, 0);
                     }
                     else
                     {
@@ -1339,11 +1339,11 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
                         }
 
                         // not trivial accept or reject, must rasterize full tile
-                        RDTSC_BEGIN(BERasterizePartial, pDC->drawId);
+                        RDTSC_BEGIN(pDC->pContext->pBucketMgr, BERasterizePartial, pDC->drawId);
                         triDesc.coverageMask[sampleNum] =
                             rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(
                                 pDC, startQuadEdges, rastEdges);
-                        RDTSC_END(BERasterizePartial, 0);
+                        RDTSC_END(pDC->pContext->pBucketMgr, BERasterizePartial, 0);
 
                         triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum];
 
@@ -1362,7 +1362,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
                     {
                         triDesc.coverageMask[sampleNum] = 0;
                     }
-                    RDTSC_EVENT(BETrivialReject, 1, 0);
+                    RDTSC_EVENT(pDC->pContext->pBucketMgr, BETrivialReject, 1, 0);
                 }
             }
 
@@ -1389,14 +1389,14 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
                 // Track rasterized subspans
                 AR_EVENT(RasterTileCount(pDC->drawId, 1));
 
-                RDTSC_BEGIN(BEPixelBackend, pDC->drawId);
+                RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelBackend, pDC->drawId);
                 backendFuncs.pfnBackend(pDC,
                                         workerId,
                                         tileX << KNOB_TILE_X_DIM_SHIFT,
                                         tileY << KNOB_TILE_Y_DIM_SHIFT,
                                         triDesc,
                                         renderBuffers);
-                RDTSC_END(BEPixelBackend, 0);
+                RDTSC_END(pDC->pContext->pBucketMgr, BEPixelBackend, 0);
             }
 
             // step to the next tile in X
@@ -1417,7 +1417,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
         StepRasterTileY<RT>(state.colorHottileEnable, renderBuffers, currentRenderBufferRow);
     }
 
-    RDTSC_END(BERasterizeTriangle, 1);
+    RDTSC_END(pDC->pContext->pBucketMgr, BERasterizeTriangle, 1);
 }
 
 // Get pointers to hot tile memory for color RT, depth, stencil
index e858a7d..6329b2e 100644 (file)
@@ -92,9 +92,3 @@ BUCKET_DESC gCoreBuckets[] = {
 static_assert(NumBuckets == (sizeof(gCoreBuckets) / sizeof(gCoreBuckets[0])),
               "RDTSC Bucket enum and description table size mismatched.");
 
-/// @todo bucketmanager and mapping should probably be a part of the SWR context
-std::vector<uint32_t> gBucketMap;
-BucketManager         gBucketMgr;
-
-uint32_t gCurrentFrame       = 0;
-bool     gBucketsInitialized = false;
index dc20e5b..0228275 100644 (file)
@@ -100,90 +100,86 @@ enum CORE_BUCKETS
     NumBuckets
 };
 
-void rdtscReset();
-void rdtscInit(int threadId);
-void rdtscStart(uint32_t bucketId);
-void rdtscStop(uint32_t bucketId, uint32_t count, uint64_t drawId);
-void rdtscEvent(uint32_t bucketId, uint32_t count1, uint32_t count2);
-void rdtscEndFrame();
+void rdtscReset(BucketManager* pBucketMgr);
+void rdtscInit(BucketManager* pBucketMgr, int threadId);
+void rdtscStart(BucketManager* pBucketMgr, uint32_t bucketId);
+void rdtscStop(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count, uint64_t drawId);
+void rdtscEvent(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count1, uint32_t count2);
+void rdtscEndFrame(BucketManager* pBucketMgr);
 
 #ifdef KNOB_ENABLE_RDTSC
-#define RDTSC_RESET() rdtscReset()
-#define RDTSC_INIT(threadId) rdtscInit(threadId)
-#define RDTSC_START(bucket) rdtscStart(bucket)
-#define RDTSC_STOP(bucket, count, draw) rdtscStop(bucket, count, draw)
-#define RDTSC_EVENT(bucket, count1, count2) rdtscEvent(bucket, count1, count2)
-#define RDTSC_ENDFRAME() rdtscEndFrame()
+#define RDTSC_RESET(pBucketMgr) rdtscReset(pBucketMgr)
+#define RDTSC_INIT(pBucketMgr, threadId) rdtscInit(pBucketMgr,threadId)
+#define RDTSC_START(pBucketMgr, bucket) rdtscStart(pBucketMgr, bucket)
+#define RDTSC_STOP(pBucketMgr, bucket, count, draw) rdtscStop(pBucketMgr, bucket, count, draw)
+#define RDTSC_EVENT(pBucketMgr, bucket, count1, count2) rdtscEvent(pBucketMgr, bucket, count1, count2)
+#define RDTSC_ENDFRAME(pBucketMgr) rdtscEndFrame(pBucketMgr)
 #else
-#define RDTSC_RESET()
-#define RDTSC_INIT(threadId)
-#define RDTSC_START(bucket)
-#define RDTSC_STOP(bucket, count, draw)
-#define RDTSC_EVENT(bucket, count1, count2)
-#define RDTSC_ENDFRAME()
+#define RDTSC_RESET(pBucketMgr)
+#define RDTSC_INIT(pBucketMgr, threadId)
+#define RDTSC_START(pBucketMgr, bucket)
+#define RDTSC_STOP(pBucketMgr, bucket, count, draw)
+#define RDTSC_EVENT(pBucketMgr, bucket, count1, count2)
+#define RDTSC_ENDFRAME(pBucketMgr)
 #endif
 
-extern std::vector<uint32_t> gBucketMap;
-extern BucketManager         gBucketMgr;
 extern BUCKET_DESC           gCoreBuckets[];
-extern uint32_t              gCurrentFrame;
-extern bool                  gBucketsInitialized;
 
-INLINE void rdtscReset()
+INLINE void rdtscReset(BucketManager *pBucketMgr)
 {
-    gCurrentFrame = 0;
-    gBucketMgr.ClearThreads();
+    pBucketMgr->mCurrentFrame = 0;
+    pBucketMgr->ClearThreads();
 }
 
-INLINE void rdtscInit(int threadId)
+INLINE void rdtscInit(BucketManager* pBucketMgr, int threadId)
 {
     // register all the buckets once
-    if (!gBucketsInitialized && (threadId == 0))
+    if (!pBucketMgr->mBucketsInitialized && (threadId == 0))
     {
-        gBucketMap.resize(NumBuckets);
+        pBucketMgr->mBucketMap.resize(NumBuckets);
         for (uint32_t i = 0; i < NumBuckets; ++i)
         {
-            gBucketMap[i] = gBucketMgr.RegisterBucket(gCoreBuckets[i]);
+            pBucketMgr->mBucketMap[i] = pBucketMgr->RegisterBucket(gCoreBuckets[i]);
         }
-        gBucketsInitialized = true;
+        pBucketMgr->mBucketsInitialized = true;
     }
 
     std::string name = threadId == 0 ? "API" : "WORKER";
-    gBucketMgr.RegisterThread(name);
+    pBucketMgr->RegisterThread(name);
 }
 
-INLINE void rdtscStart(uint32_t bucketId)
+INLINE void rdtscStart(BucketManager* pBucketMgr, uint32_t bucketId)
 {
-    uint32_t id = gBucketMap[bucketId];
-    gBucketMgr.StartBucket(id);
+    uint32_t id = pBucketMgr->mBucketMap[bucketId];
+    pBucketMgr->StartBucket(id);
 }
 
-INLINE void rdtscStop(uint32_t bucketId, uint32_t count, uint64_t drawId)
+INLINE void rdtscStop(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count, uint64_t drawId)
 {
-    uint32_t id = gBucketMap[bucketId];
-    gBucketMgr.StopBucket(id);
+    uint32_t id = pBucketMgr->mBucketMap[bucketId];
+    pBucketMgr->StopBucket(id);
 }
 
-INLINE void rdtscEvent(uint32_t bucketId, uint32_t count1, uint32_t count2)
+INLINE void rdtscEvent(BucketManager* pBucketMgr, uint32_t bucketId, uint32_t count1, uint32_t count2)
 {
-    uint32_t id = gBucketMap[bucketId];
-    gBucketMgr.AddEvent(id, count1);
+    uint32_t id = pBucketMgr->mBucketMap[bucketId];
+    pBucketMgr->AddEvent(id, count1);
 }
 
-INLINE void rdtscEndFrame()
+INLINE void rdtscEndFrame(BucketManager* pBucketMgr)
 {
-    gCurrentFrame++;
+    pBucketMgr->mCurrentFrame++;
 
-    if (gCurrentFrame == KNOB_BUCKETS_START_FRAME &&
+    if (pBucketMgr->mCurrentFrame == KNOB_BUCKETS_START_FRAME &&
         KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME)
     {
-        gBucketMgr.StartCapture();
+        pBucketMgr->StartCapture();
     }
 
-    if (gCurrentFrame == KNOB_BUCKETS_END_FRAME &&
+    if (pBucketMgr->mCurrentFrame == KNOB_BUCKETS_END_FRAME &&
         KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME)
     {
-        gBucketMgr.StopCapture();
-        gBucketMgr.PrintReport("rdtsc.txt");
+        pBucketMgr->StopCapture();
+        pBucketMgr->PrintReport("rdtsc.txt");
     }
 }
index e85144c..b6734e2 100644 (file)
@@ -31,6 +31,7 @@
 
 #include "common/formats.h"
 #include "common/intrin.h"
+#include "common/rdtsc_buckets.h"
 #include <functional>
 #include <algorithm>
 
@@ -381,6 +382,8 @@ struct SWR_PS_CONTEXT
     uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
 
     SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
+
+    BucketManager *pBucketManager; // @llvm_struct - IN: performance buckets.
 };
 
 //////////////////////////////////////////////////////////////////////////
index e30c117..59e37a4 100644 (file)
@@ -609,7 +609,7 @@ bool WorkOnFifoBE(SWR_CONTEXT* pContext,
             {
                 BE_WORK* pWork;
 
-                RDTSC_BEGIN(WorkerFoundWork, pDC->drawId);
+                RDTSC_BEGIN(pContext->pBucketMgr, WorkerFoundWork, pDC->drawId);
 
                 uint32_t numWorkItems = tile->getNumQueued();
                 SWR_ASSERT(numWorkItems);
@@ -630,7 +630,7 @@ bool WorkOnFifoBE(SWR_CONTEXT* pContext,
                     pWork->pfnWork(pDC, workerId, tileID, &pWork->desc);
                     tile->dequeue();
                 }
-                RDTSC_END(WorkerFoundWork, numWorkItems);
+                RDTSC_END(pContext->pBucketMgr, WorkerFoundWork, numWorkItems);
 
                 _ReadWriteBarrier();
 
@@ -868,7 +868,7 @@ DWORD workerThreadMain(LPVOID pData)
         SetCurrentThreadName(threadName);
     }
 
-    RDTSC_INIT(threadId);
+    RDTSC_INIT(pContext->pBucketMgr, threadId);
 
     // Only need offset numa index from base for correct masking
     uint32_t numaNode = pThreadData->numaId - pContext->threadInfo.BASE_NUMA_NODE;
@@ -936,10 +936,10 @@ DWORD workerThreadMain(LPVOID pData)
 
         if (IsBEThread)
         {
-            RDTSC_BEGIN(WorkerWorkOnFifoBE, 0);
+            RDTSC_BEGIN(pContext->pBucketMgr, WorkerWorkOnFifoBE, 0);
             bShutdown |=
                 WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask);
-            RDTSC_END(WorkerWorkOnFifoBE, 0);
+            RDTSC_END(pContext->pBucketMgr, WorkerWorkOnFifoBE, 0);
 
             WorkOnCompute(pContext, workerId, curDrawBE);
         }
index 1ea1c4b..13f4e37 100644 (file)
@@ -368,7 +368,7 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT*  pContext,
 
         if (pHotTile->state == HOTTILE_INVALID)
         {
-            RDTSC_BEGIN(BELoadTiles, pDC->drawId);
+            RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
             // invalid hottile before draw requires a load from surface before we can draw to it
             pContext->pfnLoadTile(GetPrivateState(pDC),
                                   hWorkerPrivateData,
@@ -379,15 +379,15 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT*  pContext,
                                   pHotTile->renderTargetArrayIndex,
                                   pHotTile->pBuffer);
             pHotTile->state = HOTTILE_DIRTY;
-            RDTSC_END(BELoadTiles, 0);
+            RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
         }
         else if (pHotTile->state == HOTTILE_CLEAR)
         {
-            RDTSC_BEGIN(BELoadTiles, pDC->drawId);
+            RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
             // Clear the tile.
             ClearColorHotTile(pHotTile);
             pHotTile->state = HOTTILE_DIRTY;
-            RDTSC_END(BELoadTiles, 0);
+            RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
         }
         colorHottileEnableMask &= ~(1 << rtSlot);
     }
@@ -399,7 +399,7 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT*  pContext,
             pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
         if (pHotTile->state == HOTTILE_INVALID)
         {
-            RDTSC_BEGIN(BELoadTiles, pDC->drawId);
+            RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
             // invalid hottile before draw requires a load from surface before we can draw to it
             pContext->pfnLoadTile(GetPrivateState(pDC),
                                   hWorkerPrivateData,
@@ -410,15 +410,15 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT*  pContext,
                                   pHotTile->renderTargetArrayIndex,
                                   pHotTile->pBuffer);
             pHotTile->state = HOTTILE_DIRTY;
-            RDTSC_END(BELoadTiles, 0);
+            RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
         }
         else if (pHotTile->state == HOTTILE_CLEAR)
         {
-            RDTSC_BEGIN(BELoadTiles, pDC->drawId);
+            RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
             // Clear the tile.
             ClearDepthHotTile(pHotTile);
             pHotTile->state = HOTTILE_DIRTY;
-            RDTSC_END(BELoadTiles, 0);
+            RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
         }
     }
 
@@ -429,7 +429,7 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT*  pContext,
             pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
         if (pHotTile->state == HOTTILE_INVALID)
         {
-            RDTSC_BEGIN(BELoadTiles, pDC->drawId);
+            RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
             // invalid hottile before draw requires a load from surface before we can draw to it
             pContext->pfnLoadTile(GetPrivateState(pDC),
                                   hWorkerPrivateData,
@@ -440,15 +440,15 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT*  pContext,
                                   pHotTile->renderTargetArrayIndex,
                                   pHotTile->pBuffer);
             pHotTile->state = HOTTILE_DIRTY;
-            RDTSC_END(BELoadTiles, 0);
+            RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
         }
         else if (pHotTile->state == HOTTILE_CLEAR)
         {
-            RDTSC_BEGIN(BELoadTiles, pDC->drawId);
+            RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
             // Clear the tile.
             ClearStencilHotTile(pHotTile);
             pHotTile->state = HOTTILE_DIRTY;
-            RDTSC_END(BELoadTiles, 0);
+            RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
         }
     }
 }
index b3e67b9..1975e11 100644 (file)
@@ -489,8 +489,8 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
 
    ctx->max_draws_in_flight = KNOB_MAX_DRAWS_IN_FLIGHT;
 
-   SWR_CREATECONTEXT_INFO createInfo;
-   memset(&createInfo, 0, sizeof(createInfo));
+   SWR_CREATECONTEXT_INFO createInfo {0};
+
    createInfo.privateStateSize = sizeof(swr_draw_context);
    createInfo.pfnLoadTile = swr_LoadHotTile;
    createInfo.pfnStoreTile = swr_StoreHotTile;