swr/rast: update SWR rasterizer shader stats
authorAlok Hota <alok.hota@intel.com>
Thu, 7 Jun 2018 18:14:48 +0000 (13:14 -0500)
committerAlok Hota <alok.hota@intel.com>
Tue, 5 Feb 2019 17:41:25 +0000 (11:41 -0600)
Primarily refactoring internal stats types

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
src/gallium/drivers/swr/rasterizer/archrast/events.proto
src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
src/gallium/drivers/swr/rasterizer/core/backend.cpp
src/gallium/drivers/swr/rasterizer/core/backend_impl.h
src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
src/gallium/drivers/swr/rasterizer/core/frontend.cpp
src/gallium/drivers/swr/rasterizer/core/state.h
src/gallium/drivers/swr/rasterizer/jitter/builder.h

index ceb06ae..a454fc1 100644 (file)
@@ -339,40 +339,57 @@ namespace ArchRast
                 _mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);
         }
 
-        struct ShaderStats
+        void UpdateStats(SWR_SHADER_STATS* pStatTotals, const SWR_SHADER_STATS* pStatUpdate)
         {
-            uint32_t numInstExecuted;
-        };
+            pStatTotals->numInstExecuted += pStatUpdate->numInstExecuted;
+            pStatTotals->numSampleExecuted += pStatUpdate->numSampleExecuted;
+            pStatTotals->numSampleLExecuted += pStatUpdate->numSampleLExecuted;
+            pStatTotals->numSampleBExecuted += pStatUpdate->numSampleBExecuted;
+            pStatTotals->numSampleCExecuted += pStatUpdate->numSampleCExecuted;
+            pStatTotals->numSampleCLZExecuted += pStatUpdate->numSampleCLZExecuted;
+            pStatTotals->numSampleCDExecuted += pStatUpdate->numSampleCDExecuted;
+            pStatTotals->numGather4Executed += pStatUpdate->numGather4Executed;
+            pStatTotals->numGather4CExecuted += pStatUpdate->numGather4CExecuted;
+            pStatTotals->numGather4CPOExecuted += pStatUpdate->numGather4CPOExecuted;
+            pStatTotals->numGather4CPOCExecuted += pStatUpdate->numGather4CPOCExecuted;
+            pStatTotals->numLodExecuted += pStatUpdate->numLodExecuted;
+        }
 
         virtual void Handle(const VSStats& event)
         {
-            mShaderStats[SHADER_VERTEX].numInstExecuted += event.data.numInstExecuted;
+            SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
+            UpdateStats(&mShaderStats[SHADER_VERTEX], pStats);
         }
 
         virtual void Handle(const GSStats& event)
         {
-            mShaderStats[SHADER_GEOMETRY].numInstExecuted += event.data.numInstExecuted;
+            SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
+            UpdateStats(&mShaderStats[SHADER_GEOMETRY], pStats);
         }
 
         virtual void Handle(const DSStats& event)
         {
-            mShaderStats[SHADER_DOMAIN].numInstExecuted += event.data.numInstExecuted;
+            SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
+            UpdateStats(&mShaderStats[SHADER_DOMAIN], pStats);
         }
 
         virtual void Handle(const HSStats& event)
         {
-            mShaderStats[SHADER_HULL].numInstExecuted += event.data.numInstExecuted;
+            SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
+            UpdateStats(&mShaderStats[SHADER_HULL], pStats);
         }
 
         virtual void Handle(const PSStats& event)
         {
-            mShaderStats[SHADER_PIXEL].numInstExecuted += event.data.numInstExecuted;
+            SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
+            UpdateStats(&mShaderStats[SHADER_PIXEL], pStats);
             mNeedFlush = true;
         }
 
         virtual void Handle(const CSStats& event)
         {
-            mShaderStats[SHADER_COMPUTE].numInstExecuted += event.data.numInstExecuted;
+            SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
+            UpdateStats(&mShaderStats[SHADER_COMPUTE], pStats);
             mNeedFlush = true;
         }
 
@@ -382,8 +399,32 @@ namespace ArchRast
             if (mNeedFlush == false)
                 return;
 
-            EventHandlerFile::Handle(PSInfo(drawId, mShaderStats[SHADER_PIXEL].numInstExecuted));
-            EventHandlerFile::Handle(CSInfo(drawId, mShaderStats[SHADER_COMPUTE].numInstExecuted));
+            EventHandlerFile::Handle(PSInfo(drawId,
+                                            mShaderStats[SHADER_PIXEL].numInstExecuted,
+                                            mShaderStats[SHADER_PIXEL].numSampleExecuted,
+                                            mShaderStats[SHADER_PIXEL].numSampleLExecuted,
+                                            mShaderStats[SHADER_PIXEL].numSampleBExecuted,
+                                            mShaderStats[SHADER_PIXEL].numSampleCExecuted,
+                                            mShaderStats[SHADER_PIXEL].numSampleCLZExecuted,
+                                            mShaderStats[SHADER_PIXEL].numSampleCDExecuted,
+                                            mShaderStats[SHADER_PIXEL].numGather4Executed,
+                                            mShaderStats[SHADER_PIXEL].numGather4CExecuted,
+                                            mShaderStats[SHADER_PIXEL].numGather4CPOExecuted,
+                                            mShaderStats[SHADER_PIXEL].numGather4CPOCExecuted,
+                                            mShaderStats[SHADER_PIXEL].numLodExecuted));
+            EventHandlerFile::Handle(CSInfo(drawId,
+                                            mShaderStats[SHADER_COMPUTE].numInstExecuted,
+                                            mShaderStats[SHADER_COMPUTE].numSampleExecuted,
+                                            mShaderStats[SHADER_COMPUTE].numSampleLExecuted,
+                                            mShaderStats[SHADER_COMPUTE].numSampleBExecuted,
+                                            mShaderStats[SHADER_COMPUTE].numSampleCExecuted,
+                                            mShaderStats[SHADER_COMPUTE].numSampleCLZExecuted,
+                                            mShaderStats[SHADER_COMPUTE].numSampleCDExecuted,
+                                            mShaderStats[SHADER_COMPUTE].numGather4Executed,
+                                            mShaderStats[SHADER_COMPUTE].numGather4CExecuted,
+                                            mShaderStats[SHADER_COMPUTE].numGather4CPOExecuted,
+                                            mShaderStats[SHADER_COMPUTE].numGather4CPOCExecuted,
+                                            mShaderStats[SHADER_COMPUTE].numLodExecuted));
 
             // singleSample
             EventHandlerFile::Handle(EarlyZSingleSample(
@@ -480,14 +521,58 @@ namespace ArchRast
             EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
             EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
 
-            EventHandlerFile::Handle(
-                VSInfo(event.data.drawId, mShaderStats[SHADER_VERTEX].numInstExecuted));
-            EventHandlerFile::Handle(
-                HSInfo(event.data.drawId, mShaderStats[SHADER_HULL].numInstExecuted));
-            EventHandlerFile::Handle(
-                DSInfo(event.data.drawId, mShaderStats[SHADER_DOMAIN].numInstExecuted));
-            EventHandlerFile::Handle(
-                GSInfo(event.data.drawId, mShaderStats[SHADER_GEOMETRY].numInstExecuted));
+            EventHandlerFile::Handle(VSInfo(event.data.drawId,
+                                            mShaderStats[SHADER_VERTEX].numInstExecuted,
+                                            mShaderStats[SHADER_VERTEX].numSampleExecuted,
+                                            mShaderStats[SHADER_VERTEX].numSampleLExecuted,
+                                            mShaderStats[SHADER_VERTEX].numSampleBExecuted,
+                                            mShaderStats[SHADER_VERTEX].numSampleCExecuted,
+                                            mShaderStats[SHADER_VERTEX].numSampleCLZExecuted,
+                                            mShaderStats[SHADER_VERTEX].numSampleCDExecuted,
+                                            mShaderStats[SHADER_VERTEX].numGather4Executed,
+                                            mShaderStats[SHADER_VERTEX].numGather4CExecuted,
+                                            mShaderStats[SHADER_VERTEX].numGather4CPOExecuted,
+                                            mShaderStats[SHADER_VERTEX].numGather4CPOCExecuted,
+                                            mShaderStats[SHADER_VERTEX].numLodExecuted));
+            EventHandlerFile::Handle(HSInfo(event.data.drawId,
+                                            mShaderStats[SHADER_HULL].numInstExecuted,
+                                            mShaderStats[SHADER_HULL].numSampleExecuted,
+                                            mShaderStats[SHADER_HULL].numSampleLExecuted,
+                                            mShaderStats[SHADER_HULL].numSampleBExecuted,
+                                            mShaderStats[SHADER_HULL].numSampleCExecuted,
+                                            mShaderStats[SHADER_HULL].numSampleCLZExecuted,
+                                            mShaderStats[SHADER_HULL].numSampleCDExecuted,
+                                            mShaderStats[SHADER_HULL].numGather4Executed,
+                                            mShaderStats[SHADER_HULL].numGather4CExecuted,
+                                            mShaderStats[SHADER_HULL].numGather4CPOExecuted,
+                                            mShaderStats[SHADER_HULL].numGather4CPOCExecuted,
+                                            mShaderStats[SHADER_HULL].numLodExecuted));
+            EventHandlerFile::Handle(DSInfo(event.data.drawId,
+                                            mShaderStats[SHADER_DOMAIN].numInstExecuted,
+                                            mShaderStats[SHADER_DOMAIN].numSampleExecuted,
+                                            mShaderStats[SHADER_DOMAIN].numSampleLExecuted,
+                                            mShaderStats[SHADER_DOMAIN].numSampleBExecuted,
+                                            mShaderStats[SHADER_DOMAIN].numSampleCExecuted,
+                                            mShaderStats[SHADER_DOMAIN].numSampleCLZExecuted,
+                                            mShaderStats[SHADER_DOMAIN].numSampleCDExecuted,
+                                            mShaderStats[SHADER_DOMAIN].numGather4Executed,
+                                            mShaderStats[SHADER_DOMAIN].numGather4CExecuted,
+                                            mShaderStats[SHADER_DOMAIN].numGather4CPOExecuted,
+                                            mShaderStats[SHADER_DOMAIN].numGather4CPOCExecuted,
+                                            mShaderStats[SHADER_DOMAIN].numLodExecuted));
+            EventHandlerFile::Handle(GSInfo(event.data.drawId,
+                                            mShaderStats[SHADER_GEOMETRY].numInstExecuted,
+                                            mShaderStats[SHADER_GEOMETRY].numSampleExecuted,
+                                            mShaderStats[SHADER_GEOMETRY].numSampleLExecuted,
+                                            mShaderStats[SHADER_GEOMETRY].numSampleBExecuted,
+                                            mShaderStats[SHADER_GEOMETRY].numSampleCExecuted,
+                                            mShaderStats[SHADER_GEOMETRY].numSampleCLZExecuted,
+                                            mShaderStats[SHADER_GEOMETRY].numSampleCDExecuted,
+                                            mShaderStats[SHADER_GEOMETRY].numGather4Executed,
+                                            mShaderStats[SHADER_GEOMETRY].numGather4CExecuted,
+                                            mShaderStats[SHADER_GEOMETRY].numGather4CPOExecuted,
+                                            mShaderStats[SHADER_GEOMETRY].numGather4CPOCExecuted,
+                                            mShaderStats[SHADER_GEOMETRY].numLodExecuted));
 
             mShaderStats[SHADER_VERTEX]   = {};
             mShaderStats[SHADER_HULL]     = {};
@@ -544,7 +629,7 @@ namespace ArchRast
         CullStats         mCullStats      = {};
         AlphaStats        mAlphaStats     = {};
 
-        ShaderStats mShaderStats[NUM_SHADER_TYPES];
+        SWR_SHADER_STATS mShaderStats[NUM_SHADER_TYPES];
 
     };
 
index 32bd81f..fdf39ee 100644 (file)
@@ -325,34 +325,101 @@ event VSInfo
 {
     uint32_t drawId;
     uint32_t numInstExecuted;
+    uint32_t numSampleExecuted;
+    uint32_t numSampleLExecuted;
+    uint32_t numSampleBExecuted;
+    uint32_t numSampleCExecuted;
+    uint32_t numSampleCLZExecuted;
+    uint32_t numSampleCDExecuted;
+    uint32_t numGather4Executed;
+    uint32_t numGather4CExecuted;
+    uint32_t numGather4CPOExecuted;
+    uint32_t numGather4CPOCExecuted;
+    uint32_t numLodExecuted;
 };
 
 event HSInfo
 {
     uint32_t drawId;
     uint32_t numInstExecuted;
+    uint32_t numSampleExecuted;
+    uint32_t numSampleLExecuted;
+    uint32_t numSampleBExecuted;
+    uint32_t numSampleCExecuted;
+    uint32_t numSampleCLZExecuted;
+    uint32_t numSampleCDExecuted;
+    uint32_t numGather4Executed;
+    uint32_t numGather4CExecuted;
+    uint32_t numGather4CPOExecuted;
+    uint32_t numGather4CPOCExecuted;
+    uint32_t numLodExecuted;
 };
 
 event DSInfo
 {
     uint32_t drawId;
     uint32_t numInstExecuted;
+    uint32_t numSampleExecuted;
+    uint32_t numSampleLExecuted;
+    uint32_t numSampleBExecuted;
+    uint32_t numSampleCExecuted;
+    uint32_t numSampleCLZExecuted;
+    uint32_t numSampleCDExecuted;
+    uint32_t numGather4Executed;
+    uint32_t numGather4CExecuted;
+    uint32_t numGather4CPOExecuted;
+    uint32_t numGather4CPOCExecuted;
+    uint32_t numLodExecuted;
 };
 
 event GSInfo
 {
     uint32_t drawId;
     uint32_t numInstExecuted;
+    uint32_t numSampleExecuted;
+    uint32_t numSampleLExecuted;
+    uint32_t numSampleBExecuted;
+    uint32_t numSampleCExecuted;
+    uint32_t numSampleCLZExecuted;
+    uint32_t numSampleCDExecuted;
+    uint32_t numGather4Executed;
+    uint32_t numGather4CExecuted;
+    uint32_t numGather4CPOExecuted;
+    uint32_t numGather4CPOCExecuted;
+    uint32_t numLodExecuted;
+
 };
 
 event PSInfo
 {
     uint32_t drawId;
     uint32_t numInstExecuted;
+    uint32_t numSampleExecuted;
+    uint32_t numSampleLExecuted;
+    uint32_t numSampleBExecuted;
+    uint32_t numSampleCExecuted;
+    uint32_t numSampleCLZExecuted;
+    uint32_t numSampleCDExecuted;
+    uint32_t numGather4Executed;
+    uint32_t numGather4CExecuted;
+    uint32_t numGather4CPOExecuted;
+    uint32_t numGather4CPOCExecuted;
+    uint32_t numLodExecuted;
 };
 
 event CSInfo
 {
     uint32_t drawId;
     uint32_t numInstExecuted;
+    uint32_t numSampleExecuted;
+    uint32_t numSampleLExecuted;
+    uint32_t numSampleBExecuted;
+    uint32_t numSampleCExecuted;
+    uint32_t numSampleCLZExecuted;
+    uint32_t numSampleCDExecuted;
+    uint32_t numGather4Executed;
+    uint32_t numGather4CExecuted;
+    uint32_t numGather4CPOExecuted;
+    uint32_t numGather4CPOCExecuted;
+    uint32_t numLodExecuted;
 };
\ No newline at end of file
index f5cfb47..b49d4bf 100644 (file)
@@ -168,30 +168,30 @@ event DrawIndexedInstancedEvent
 
 event VSStats
 {
-    uint32_t numInstExecuted;
+    HANDLE hStats;      // SWR_SHADER_STATS
 };
 
 event HSStats
 {
-    uint32_t numInstExecuted;
+    HANDLE hStats;      // SWR_SHADER_STATS
 };
 
 event DSStats
 {
-    uint32_t numInstExecuted;
+    HANDLE hStats;      // SWR_SHADER_STATS
 };
 
 event GSStats
 {
-    uint32_t numInstExecuted;
+    HANDLE hStats;      // SWR_SHADER_STATS
 };
 
 event PSStats
 {
-    uint32_t numInstExecuted;
+    HANDLE hStats;      // SWR_SHADER_STATS
 };
 
 event CSStats
 {
-    uint32_t numInstExecuted;
+    HANDLE hStats;      // SWR_SHADER_STATS
 };
\ No newline at end of file
index 8f8dbcf..883475c 100644 (file)
@@ -88,7 +88,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC,
                     &csContext);
 
     UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup);
-    AR_EVENT(CSStats(csContext.stats.numInstExecuted));
+    AR_EVENT(CSStats((HANDLE)&csContext.stats));
 
     RDTSC_END(BEDispatch, 1);
 }
index 1798dad..d556c54 100644 (file)
@@ -1212,7 +1212,7 @@ void BackendPixelRate(DRAW_CONTEXT*        pDC,
 
             // update stats
             UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
-            AR_EVENT(PSStats(psContext.stats.numInstExecuted));
+            AR_EVENT(PSStats((HANDLE)&psContext.stats));
 
             // update active lanes to remove any discarded or oMask'd pixels
             activeLanes = _simd_castsi_ps(_simd_and_si(
index a1a1185..ff09cc6 100644 (file)
@@ -207,7 +207,7 @@ void BackendSampleRate(DRAW_CONTEXT*        pDC,
 
                     // update stats
                     UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
-                    AR_EVENT(PSStats(psContext.stats.numInstExecuted));
+                    AR_EVENT(PSStats((HANDLE)&psContext.stats));
 
                     vCoverageMask = _simd_castsi_ps(psContext.activeMask);
 
index 2efb01f..1c065ab 100644 (file)
@@ -188,7 +188,7 @@ void BackendSingleSample(DRAW_CONTEXT*        pDC,
 
                 // update stats
                 UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
-                AR_EVENT(PSStats(psContext.stats.numInstExecuted));
+                AR_EVENT(PSStats((HANDLE)&psContext.stats));
 
                 vCoverageMask = _simd_castsi_ps(psContext.activeMask);
 
index b0d9f05..b510fea 100644 (file)
@@ -888,7 +888,7 @@ static void GeometryShaderStage(DRAW_CONTEXT* pDC,
 
         // execute the geometry shader
         state.pfnGsFunc(GetPrivateState(pDC), pWorkerData, &gsContext);
-        AR_EVENT(GSStats(gsContext.stats.numInstExecuted));
+        AR_EVENT(GSStats((HANDLE)&gsContext.stats));
 
         for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i)
         {
@@ -1375,7 +1375,7 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
     RDTSC_END(FEHullShader, 0);
 
     UPDATE_STAT_FE(HsInvocations, numPrims);
-    AR_EVENT(HSStats(hsContext.stats.numInstExecuted));
+    AR_EVENT(HSStats((HANDLE)&hsContext.stats));
 
     const uint32_t* pPrimId = (const uint32_t*)&primID;
 
@@ -1443,7 +1443,7 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
             state.pfnDsFunc(GetPrivateState(pDC), pWorkerData, &dsContext);
             RDTSC_END(FEDomainShader, 0);
 
-            AR_EVENT(DSStats(dsContext.stats.numInstExecuted));
+            AR_EVENT(DSStats((HANDLE)&dsContext.stats));
 
             dsInvocations += KNOB_SIMD_WIDTH;
         }
@@ -1950,15 +1950,15 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
                     RDTSC_BEGIN(FEVertexShader, pDC->drawId);
 #if USE_SIMD16_VS
                     state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo);
-                    AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted));
+                    AR_EVENT(VSStats((HANDLE)&vsContext_lo.stats));
 #else
                     state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo);
-                    AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted));
+                    AR_EVENT(VSStats((HANDLE)&vsContext_lo.stats));
 
                     if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
                     {
                         state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_hi);
-                        AR_EVENT(VSStats(vsContext_hi.stats.numInstExecuted));
+                        AR_EVENT(VSStats((HANDLE)&vsContext_hi.stats));
                     }
 #endif
                     RDTSC_END(FEVertexShader, 0);
@@ -2214,7 +2214,7 @@ void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, vo
                     RDTSC_END(FEVertexShader, 0);
 
                     UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex));
-                    AR_EVENT(VSStats(vsContext.stats.numInstExecuted));
+                    AR_EVENT(VSStats((HANDLE)&vsContext.stats));
                 }
             }
 
index 0b42a45..bdf4f6e 100644 (file)
@@ -221,7 +221,18 @@ struct SIMDVERTEX_T
 /////////////////////////////////////////////////////////////////////////
 struct SWR_SHADER_STATS
 {
-    uint32_t numInstExecuted; // This is roughly the API instructions executed and not x86.
+    uint32_t numInstExecuted;      // This is roughly the API instructions executed and not x86.
+    uint32_t numSampleExecuted;
+    uint32_t numSampleLExecuted;
+    uint32_t numSampleBExecuted;
+    uint32_t numSampleCExecuted;
+    uint32_t numSampleCLZExecuted;
+    uint32_t numSampleCDExecuted;
+    uint32_t numGather4Executed;
+    uint32_t numGather4CExecuted;
+    uint32_t numGather4CPOExecuted;
+    uint32_t numGather4CPOCExecuted;
+    uint32_t numLodExecuted;
 };
 
 //////////////////////////////////////////////////////////////////////////
index 0ce8d02..d252482 100644 (file)
@@ -55,6 +55,9 @@ namespace SwrJit
         STATS_STORE_TGSM              = 15,
         STATS_DISCARD                 = 16,
         STATS_BARRIER                 = 17,
+
+        // ------------------
+        STATS_TOTAL_COUNTERS
     };
 
     using namespace llvm;