swr/rasterizer: Refactor events collection mechanism
authorJan Zielinski <jan.zielinski@intel.com>
Thu, 1 Aug 2019 12:30:58 +0000 (14:30 +0200)
committerJan Zielinski <jan.zielinski@intel.com>
Thu, 8 Aug 2019 09:15:07 +0000 (11:15 +0200)
Several improvements and cleanups in events and statstics mechanisms

Reviewed-by: Alok Hota <alok.hota@intel.com>
src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
src/gallium/drivers/swr/rasterizer/archrast/events.proto
src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py
src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py
src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp
src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp
src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp
src/gallium/drivers/swr/rasterizer/core/threads.cpp
src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h

index 03df614..c1d3f2d 100644 (file)
@@ -87,81 +87,6 @@ namespace ArchRast
         uint32_t alphaBlendCount = 0;
     };
 
-    struct MemoryStats
-    {
-        struct MemoryTrackerKey
-        {
-            uint64_t address;
-            uint64_t mask;
-        };
-
-        struct MemoryTrackerData
-        {
-            uint32_t accessCountRead;
-            uint32_t accessCountWrite;
-            uint32_t totalSizeRead;
-            uint32_t totalSizeWrite;
-            uint64_t tscMin;
-            uint64_t tscMax;
-        };
-
-        struct AddressRangeComparator
-        {
-            bool operator()(MemoryTrackerKey a, MemoryTrackerKey b) const
-            {
-                return (a.address & a.mask) < (b.address & b.mask);
-            }
-        };
-
-        typedef std::map<MemoryTrackerKey, MemoryTrackerData, AddressRangeComparator> MemoryTrackerMap;
-        MemoryTrackerMap trackedMemory = {};
-
-        void TrackMemoryAccess(uint64_t address, uint64_t addressMask, uint8_t isRead, uint64_t tsc, uint32_t size)
-        {
-            MemoryTrackerKey key;
-            key.address = address;
-            key.mask = addressMask;
-
-            MemoryTrackerMap::iterator i = trackedMemory.lower_bound(key);
-            if (i != trackedMemory.end() && !(trackedMemory.key_comp()(key, i->first)))
-            {
-                // already in map
-                if (isRead)
-                {
-                    i->second.accessCountRead++;
-                    i->second.totalSizeRead += size;
-                }
-                else
-                {
-                    i->second.accessCountWrite++;
-                    i->second.totalSizeWrite += size;
-                }
-                i->second.tscMax = tsc;
-            }
-            else
-            {
-                // new entry
-                MemoryTrackerData data;
-                if (isRead)
-                {
-                    data.accessCountRead = 1;
-                    data.totalSizeRead = size;
-                    data.accessCountWrite = 0;
-                    data.totalSizeWrite = 0;
-                }
-                else
-                {
-                    data.accessCountRead = 0;
-                    data.totalSizeRead = 0;
-                    data.accessCountWrite = 1;
-                    data.totalSizeWrite = size;
-                }
-                data.tscMin = tsc;
-                data.tscMax = tsc;
-                trackedMemory.insert(i, MemoryTrackerMap::value_type(key, data));
-            }
-        }
-    };
 
     //////////////////////////////////////////////////////////////////////////
     /// @brief Event handler that handles API thread events. This is shared
@@ -258,17 +183,6 @@ namespace ArchRast
         EventHandlerWorkerStats(uint32_t id) : EventHandlerFile(id), mNeedFlush(false)
         {
             memset(mShaderStats, 0, sizeof(mShaderStats));
-
-            // compute address mask for memory tracking
-            mAddressMask = 0;
-            uint64_t addressRangeBytes = 4096;
-            while (addressRangeBytes > 0)
-            {
-                mAddressMask = (mAddressMask << 1) | 1;
-                addressRangeBytes = addressRangeBytes >> 1;
-            }
-            mMemGranularity = mAddressMask + 1;
-            mAddressMask = ~mAddressMask;
         }
 
         virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
@@ -674,42 +588,6 @@ namespace ArchRast
             mGS      = {};
         }
 
-        virtual void Handle(const MemoryAccessEvent& event)
-        {
-            uint64_t trackAddr = event.data.ptr;
-            uint64_t nextAddr = (trackAddr & mAddressMask);
-            uint32_t sizeTracked = 0;
-
-            while (sizeTracked < event.data.size)
-            {
-                nextAddr += mMemGranularity;
-                uint32_t size = nextAddr - trackAddr;
-                size = std::min(event.data.size, size);
-                mMemoryStats.TrackMemoryAccess(trackAddr, mAddressMask, event.data.isRead, event.data.tsc, size);
-                sizeTracked += size;
-                trackAddr = nextAddr;
-            }
-        }
-
-        virtual void Handle(const MemoryStatsEndEvent& event)
-        {
-            MemoryStats::MemoryTrackerMap::iterator i = mMemoryStats.trackedMemory.begin();
-            while (i != mMemoryStats.trackedMemory.end())
-            {
-                MemoryStatsEvent mse(event.data.drawId,
-                                     i->first.address & mAddressMask,
-                                     i->second.accessCountRead,
-                                     i->second.accessCountWrite,
-                                     i->second.totalSizeRead,
-                                     i->second.totalSizeWrite,
-                                     i->second.tscMin,
-                                     i->second.tscMax);
-                EventHandlerFile::Handle(mse);
-                i++;
-            }
-            mMemoryStats.trackedMemory.clear();
-        }
-
         virtual void Handle(const GSPrimInfo& event)
         {
             mGS.inputPrimCount += event.data.inputPrimCount;
@@ -756,10 +634,6 @@ namespace ArchRast
 
         SWR_SHADER_STATS mShaderStats[NUM_SHADER_TYPES];
 
-        MemoryStats      mMemoryStats     = {};
-        uint64_t         mAddressMask     = 0;
-        uint64_t         mMemGranularity  = 0;
-
     };
 
     static EventManager* FromHandle(HANDLE hThreadContext)
index 8a6093f..a530893 100644 (file)
@@ -37,449 +37,391 @@ event Framework::ThreadStartWorkerEvent
 {
 };
 
-event SwrApi::DrawInfoEvent
+///@brief Used as a helper event to indicate end of frame. Does not gaurantee to capture end of frame on all APIs
+event ApiSwr::FrameEndEvent
 {
-    uint32_t drawId;
-    AR_DRAW_TYPE type;
-    uint32_t topology;
-    uint32_t numVertices;
-    uint32_t numIndices;
-    int32_t  indexOffset;
-    int32_t  baseVertex;
-    uint32_t numInstances;
-    uint32_t startInstance;
-    uint32_t tsEnable;
-    uint32_t gsEnable;
-    uint32_t soEnable;
-    uint32_t soTopology;
-    uint32_t splitId; // Split draw count or id.
+    uint32_t frameId;       // current frame id
+    uint32_t nextDrawId;    // next draw id (always incremental - does not reset)
 };
 
-event SwrApi::DispatchEvent
+///@brief Synchonization event.
+event ApiSwr::SwrSyncEvent
 {
     uint32_t drawId;
-    uint32_t threadGroupCountX;
-    uint32_t threadGroupCountY;
-    uint32_t threadGroupCountZ;
 };
 
-event SwrApi::FrameEndEvent
+///@brief Invalidate hot tiles (i.e. tile cache)
+event ApiSwr::SwrInvalidateTilesEvent
 {
-    uint32_t frameId;
-    uint32_t nextDrawId;
+    uint32_t drawId;
 };
 
-///@brief API Stat: Synchonization event.
-event SwrApi::SwrSyncEvent
+///@brief Invalidate and discard hot tiles within pixel region
+event ApiSwr::SwrDiscardRectEvent
 {
     uint32_t drawId;
 };
 
-///@brief API Stat: Invalidate hot tiles (i.e. tile cache)
-event SwrApi::SwrInvalidateTilesEvent
+///@brief Flush tiles out to memory that is typically owned by driver (e.g. Flush RT cache)
+event ApiSwr::SwrStoreTilesEvent
 {
     uint32_t drawId;
 };
 
-///@brief API Stat: Invalidate and discard hot tiles within pixel region
-event SwrApi::SwrDiscardRectEvent
+event PipelineStats::DrawInfoEvent
 {
     uint32_t drawId;
+    AR_DRAW_TYPE type;  // type of draw (indexed, instanced, etc)
+    uint32_t topology;  // topology of draw
+    uint32_t numVertices; // number of vertices for draw
+    uint32_t numIndices; // number of indices for draw
+    int32_t  indexOffset; // offset into index buffer
+    int32_t  baseVertex; // which vertex to start with
+    uint32_t numInstances; // number of instances to draw
+    uint32_t startInstance; // which instance to start fetching
+    uint32_t tsEnable; // tesselation enabled
+    uint32_t gsEnable; // geometry shader enabled
+    uint32_t soEnable; // stream-out enabled
+    uint32_t soTopology; // topology of stream-out
+    uint32_t splitId; // split draw count or id
 };
 
-///@brief API Stat: Flush tiles out to memory that is typically owned by driver (e.g. Flush RT cache)
-event SwrApi::SwrStoreTilesEvent
+event PipelineStats::DispatchEvent
 {
     uint32_t drawId;
+    uint32_t threadGroupCountX; // num thread groups in X dimension
+    uint32_t threadGroupCountY; // num thread groups in Y dimension
+    uint32_t threadGroupCountZ; // num thread groups in Z dimension
 };
 
-event Pipeline::FrontendStatsEvent
+event PipelineStats::FrontendStatsEvent
 {
     uint32_t drawId;
-    uint64_t counter IaVertices;
-    uint64_t counter IaPrimitives;
-    uint64_t counter VsInvocations;
-    uint64_t counter HsInvocations;
-    uint64_t counter DsInvocations;
-    uint64_t counter GsInvocations;
-    uint64_t counter GsPrimitives;
-    uint64_t counter CInvocations;
-    uint64_t counter CPrimitives;
-    uint64_t counter SoPrimStorageNeeded0;
-    uint64_t counter SoPrimStorageNeeded1;
-    uint64_t counter SoPrimStorageNeeded2;
-    uint64_t counter SoPrimStorageNeeded3;
-    uint64_t counter SoNumPrimsWritten0;
-    uint64_t counter SoNumPrimsWritten1;
-    uint64_t counter SoNumPrimsWritten2;
-    uint64_t counter SoNumPrimsWritten3;
+    uint64_t IaVertices;
+    uint64_t IaPrimitives;
+    uint64_t VsInvocations;
+    uint64_t HsInvocations;
+    uint64_t DsInvocations;
+    uint64_t GsInvocations;
+    uint64_t GsPrimitives;
+    uint64_t CInvocations;
+    uint64_t CPrimitives;
+    uint64_t SoPrimStorageNeeded0;
+    uint64_t SoPrimStorageNeeded1;
+    uint64_t SoPrimStorageNeeded2;
+    uint64_t SoPrimStorageNeeded3;
+    uint64_t SoNumPrimsWritten0;
+    uint64_t SoNumPrimsWritten1;
+    uint64_t SoNumPrimsWritten2;
+    uint64_t SoNumPrimsWritten3;
 };
 
-event Pipeline::BackendStatsEvent
+event PipelineStats::BackendStatsEvent
 {
     uint32_t drawId;
-    uint64_t counter DepthPassCount;
-    uint64_t counter PsInvocations;
-    uint64_t counter CsInvocations;
+    uint64_t DepthPassCount;
+    uint64_t PsInvocations;
+    uint64_t CsInvocations;
 
 };
 
-event Pipeline::EarlyZSingleSample
+event PipelineStats::EarlyZSingleSample
 {
     uint32_t drawId;
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
-event Pipeline::LateZSingleSample
+event PipelineStats::LateZSingleSample
 {
     uint32_t drawId;
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
-event Pipeline::EarlyStencilSingleSample
+event PipelineStats::EarlyStencilSingleSample
 {
     uint32_t drawId; 
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
-event Pipeline::LateStencilSingleSample
+event PipelineStats::LateStencilSingleSample
 {
     uint32_t drawId; 
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
-event Pipeline::EarlyZSampleRate
+event PipelineStats::EarlyZSampleRate
 {
     uint32_t drawId;
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
-event Pipeline::LateZSampleRate
+event PipelineStats::LateZSampleRate
 {
     uint32_t drawId;
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
-event Pipeline::EarlyStencilSampleRate
+event PipelineStats::EarlyStencilSampleRate
 {
     uint32_t drawId; 
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
-event Pipeline::LateStencilSampleRate
+event PipelineStats::LateStencilSampleRate
 {
     uint32_t drawId; 
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
 // Total Early-Z counts, SingleSample and SampleRate
-event Pipeline::EarlyZ
+event PipelineStats::EarlyZ
 {
     uint32_t drawId;
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
 // Total LateZ counts, SingleSample and SampleRate
-event Pipeline::LateZ
+event PipelineStats::LateZ
 {
     uint32_t drawId;
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
 // Total EarlyStencil counts, SingleSample and SampleRate
-event Pipeline::EarlyStencil
+event PipelineStats::EarlyStencil
 {
     uint32_t drawId; 
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
 // Total LateStencil counts, SingleSample and SampleRate
-event Pipeline::LateStencil
+event PipelineStats::LateStencil
 {
     uint32_t drawId; 
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
-event Pipeline::EarlyZNullPS
+event PipelineStats::EarlyZNullPS
 {
     uint32_t drawId;
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
-event Pipeline::EarlyStencilNullPS
+event PipelineStats::EarlyStencilNullPS
 {
     uint32_t drawId; 
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
-event Pipeline::EarlyZPixelRate
+event PipelineStats::EarlyZPixelRate
 {
     uint32_t drawId;
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
-event Pipeline::LateZPixelRate
+event PipelineStats::LateZPixelRate
 {
     uint32_t drawId;
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
 
-event Pipeline::EarlyOmZ
+event PipelineStats::EarlyOmZ
 {
     uint32_t drawId;
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
-event Pipeline::EarlyOmStencil
+event PipelineStats::EarlyOmStencil
 {
     uint32_t drawId;
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
-event Pipeline::LateOmZ
+event PipelineStats::LateOmZ
 {
     uint32_t drawId;
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
-event Pipeline::LateOmStencil
+event PipelineStats::LateOmStencil
 {
     uint32_t drawId;
-    uint64_t counter passCount;
-    uint64_t counter failCount;
+    uint64_t passCount;
+    uint64_t failCount;
 };
 
-event Pipeline::GSInputPrims
+event PipelineStats::GSInputPrims
 {
     uint32_t drawId;
-    uint64_t counter inputPrimCount;
+    uint64_t inputPrimCount;
 };
 
-event Pipeline::GSPrimsGen
+event PipelineStats::GSPrimsGen
 {
     uint32_t drawId;
-    uint64_t counter primGeneratedCount;
+    uint64_t primGeneratedCount;
 };
 
-event Pipeline::GSVertsInput
+event PipelineStats::GSVertsInput
 {
     uint32_t drawId;
-    uint64_t counter vertsInput;
+    uint64_t vertsInput;
 };
 
-event Pipeline::TessPrims
+event PipelineStats::TessPrims
 {
     uint32_t drawId;
-    uint64_t counter primCount;
+    uint64_t primCount;
 };
 
-event Pipeline::RasterTiles
+event PipelineStats::RasterTiles
 {
     uint32_t drawId;
-    uint32_t counter rastTileCount;
+    uint32_t rastTileCount;
 };
 
-event Pipeline::ClipperEvent
+event PipelineStats::ClipperEvent
 {
     uint32_t drawId;
-    uint32_t counter trivialRejectCount;
-    uint32_t counter trivialAcceptCount;
-    uint32_t counter mustClipCount;
+    uint32_t trivialRejectCount;
+    uint32_t trivialAcceptCount;
+    uint32_t mustClipCount;
 };
 
-event Pipeline::CullEvent
+event PipelineStats::CullEvent
 {
     uint32_t drawId;
-    uint64_t counter backfacePrimCount;
-    uint64_t counter degeneratePrimCount;
+    uint64_t backfacePrimCount;
+    uint64_t degeneratePrimCount;
 };
 
-event Pipeline::AlphaEvent
+event PipelineStats::AlphaEvent
 {
     uint32_t drawId;
-    uint32_t counter alphaTestCount;
-    uint32_t counter alphaBlendCount;
+    uint32_t alphaTestCount;
+    uint32_t alphaBlendCount;
 };
 
-event Shader::VSInfo
+event ShaderStats::VSInfo
 {
     uint32_t drawId;
-    uint32_t counter numInstExecuted;
-    uint32_t counter numSampleExecuted;
-    uint32_t counter numSampleLExecuted;
-    uint32_t counter numSampleBExecuted;
-    uint32_t counter numSampleCExecuted;
-    uint32_t counter numSampleCLZExecuted;
-    uint32_t counter numSampleCDExecuted;
-    uint32_t counter numGather4Executed;
-    uint32_t counter numGather4CExecuted;
-    uint32_t counter numGather4CPOExecuted;
-    uint32_t counter numGather4CPOCExecuted;
-    uint32_t counter numLodExecuted;
+    uint32_t numInstExecuted;
+    uint32_t numSampleExecuted;
+    uint32_t numSampleLExecuted;
+    uint32_t numSampleBExecuted;
+    uint32_t numSampleCExecuted;
+    uint32_t numSampleCLZExecuted;
+    uint32_t numSampleCDExecuted;
+    uint32_t numGather4Executed;
+    uint32_t numGather4CExecuted;
+    uint32_t numGather4CPOExecuted;
+    uint32_t numGather4CPOCExecuted;
+    uint32_t numLodExecuted;
 };
 
-event Shader::HSInfo
+event ShaderStats::HSInfo
 {
     uint32_t drawId;
-    uint32_t counter numInstExecuted;
-    uint32_t counter numSampleExecuted;
-    uint32_t counter numSampleLExecuted;
-    uint32_t counter numSampleBExecuted;
-    uint32_t counter numSampleCExecuted;
-    uint32_t counter numSampleCLZExecuted;
-    uint32_t counter numSampleCDExecuted;
-    uint32_t counter numGather4Executed;
-    uint32_t counter numGather4CExecuted;
-    uint32_t counter numGather4CPOExecuted;
-    uint32_t counter numGather4CPOCExecuted;
-    uint32_t counter numLodExecuted;
+    uint32_t numInstExecuted;
+    uint32_t numSampleExecuted;
+    uint32_t numSampleLExecuted;
+    uint32_t numSampleBExecuted;
+    uint32_t numSampleCExecuted;
+    uint32_t numSampleCLZExecuted;
+    uint32_t numSampleCDExecuted;
+    uint32_t numGather4Executed;
+    uint32_t numGather4CExecuted;
+    uint32_t numGather4CPOExecuted;
+    uint32_t numGather4CPOCExecuted;
+    uint32_t numLodExecuted;
 };
 
-event Shader::DSInfo
+event ShaderStats::DSInfo
 {
     uint32_t drawId;
-    uint32_t counter numInstExecuted;
-    uint32_t counter numSampleExecuted;
-    uint32_t counter numSampleLExecuted;
-    uint32_t counter numSampleBExecuted;
-    uint32_t counter numSampleCExecuted;
-    uint32_t counter numSampleCLZExecuted;
-    uint32_t counter numSampleCDExecuted;
-    uint32_t counter numGather4Executed;
-    uint32_t counter numGather4CExecuted;
-    uint32_t counter numGather4CPOExecuted;
-    uint32_t counter numGather4CPOCExecuted;
-    uint32_t counter numLodExecuted;
+    uint32_t numInstExecuted;
+    uint32_t numSampleExecuted;
+    uint32_t numSampleLExecuted;
+    uint32_t numSampleBExecuted;
+    uint32_t numSampleCExecuted;
+    uint32_t numSampleCLZExecuted;
+    uint32_t numSampleCDExecuted;
+    uint32_t numGather4Executed;
+    uint32_t numGather4CExecuted;
+    uint32_t numGather4CPOExecuted;
+    uint32_t numGather4CPOCExecuted;
+    uint32_t numLodExecuted;
 };
 
-event Shader::GSInfo
+event ShaderStats::GSInfo
 {
     uint32_t drawId;
-    uint32_t counter numInstExecuted;
-    uint32_t counter numSampleExecuted;
-    uint32_t counter numSampleLExecuted;
-    uint32_t counter numSampleBExecuted;
-    uint32_t counter numSampleCExecuted;
-    uint32_t counter numSampleCLZExecuted;
-    uint32_t counter numSampleCDExecuted;
-    uint32_t counter numGather4Executed;
-    uint32_t counter numGather4CExecuted;
-    uint32_t counter numGather4CPOExecuted;
-    uint32_t counter numGather4CPOCExecuted;
-    uint32_t counter numLodExecuted;
+    uint32_t numInstExecuted;
+    uint32_t numSampleExecuted;
+    uint32_t numSampleLExecuted;
+    uint32_t numSampleBExecuted;
+    uint32_t numSampleCExecuted;
+    uint32_t numSampleCLZExecuted;
+    uint32_t numSampleCDExecuted;
+    uint32_t numGather4Executed;
+    uint32_t numGather4CExecuted;
+    uint32_t numGather4CPOExecuted;
+    uint32_t numGather4CPOCExecuted;
+    uint32_t numLodExecuted;
 
 };
 
-event Shader::PSInfo
+event ShaderStats::PSInfo
 {
     uint32_t drawId;
-    uint32_t counter numInstExecuted;
-    uint32_t counter numSampleExecuted;
-    uint32_t counter numSampleLExecuted;
-    uint32_t counter numSampleBExecuted;
-    uint32_t counter numSampleCExecuted;
-    uint32_t counter numSampleCLZExecuted;
-    uint32_t counter numSampleCDExecuted;
-    uint32_t counter numGather4Executed;
-    uint32_t counter numGather4CExecuted;
-    uint32_t counter numGather4CPOExecuted;
-    uint32_t counter numGather4CPOCExecuted;
-    uint32_t counter numLodExecuted;
+    uint32_t numInstExecuted;
+    uint32_t numSampleExecuted;
+    uint32_t numSampleLExecuted;
+    uint32_t numSampleBExecuted;
+    uint32_t numSampleCExecuted;
+    uint32_t numSampleCLZExecuted;
+    uint32_t numSampleCDExecuted;
+    uint32_t numGather4Executed;
+    uint32_t numGather4CExecuted;
+    uint32_t numGather4CPOExecuted;
+    uint32_t numGather4CPOCExecuted;
+    uint32_t numLodExecuted;
 };
 
-event Shader::CSInfo
+event ShaderStats::CSInfo
 {
     uint32_t drawId;
-    uint32_t counter numInstExecuted;
-    uint32_t counter numSampleExecuted;
-    uint32_t counter numSampleLExecuted;
-    uint32_t counter numSampleBExecuted;
-    uint32_t counter numSampleCExecuted;
-    uint32_t counter numSampleCLZExecuted;
-    uint32_t counter numSampleCDExecuted;
-    uint32_t counter numGather4Executed;
-    uint32_t counter numGather4CExecuted;
-    uint32_t counter numGather4CPOExecuted;
-    uint32_t counter numGather4CPOCExecuted;
-    uint32_t counter numLodExecuted;
-};
-
-event SWTagApi::SWTagEndFrameEvent
-{
-       uint64_t frameCount;
-       uint32_t renderpassCount;
-       uint32_t drawOrDispatchCount;
-       uint32_t drawCount;
-       uint32_t dispatchCount;
-};
-
-event SWTagApi::SWTagRenderpassEvent
-{
-       uint64_t frameCount;
-       uint32_t renderpassCount;
-       uint32_t drawOrDispatchCount;
-       uint32_t drawCount;
-       uint32_t dispatchCount;
-};
-
-event SWTagApi::SWTagDrawEvent
-{
-       uint64_t frameCount;
-       uint32_t renderpassCount;
-       uint32_t drawOrDispatchCount;
-       uint32_t drawCount;
-       uint32_t dispatchCount;
+    uint32_t numInstExecuted;
+    uint32_t numSampleExecuted;
+    uint32_t numSampleLExecuted;
+    uint32_t numSampleBExecuted;
+    uint32_t numSampleCExecuted;
+    uint32_t numSampleCLZExecuted;
+    uint32_t numSampleCDExecuted;
+    uint32_t numGather4Executed;
+    uint32_t numGather4CExecuted;
+    uint32_t numGather4CPOExecuted;
+    uint32_t numGather4CPOCExecuted;
+    uint32_t numLodExecuted;
 };
 
-event SWTagApi::SWTagDispatchEvent
-{
-       uint64_t frameCount;
-       uint32_t renderpassCount;
-       uint32_t drawOrDispatchCount;
-       uint32_t drawCount;
-       uint32_t dispatchCount;
-};
-
-event SWTagApi::SWTagDriverCallEvent
-{
-    char cmd[256];
-};
-
-event SWTag::SWTagFlushEvent
-{
-       uint32_t count;
-    char        reason[256];
-    uint32_t type;
-};
-
-event Memory::MemoryStatsEvent
-{
-    uint32_t drawId;
-    uint64_t baseAddr;
-    uint32_t accessCountRead;
-    uint32_t accessCountWrite;
-    uint32_t totalSizeRead;
-    uint32_t totalSizeWrite;
-    uint64_t tscMin;
-    uint64_t tscMax;
-};
index da4419a..b57d5c4 100644 (file)
 # ArchRast is to not pollute the Rasty code with lots of calculations, etc. that
 # are needed to compute per draw statistics, etc.
 
-event Pipeline::EarlyDepthStencilInfoSingleSample
+event PipelineStats::EarlyDepthStencilInfoSingleSample
 {
     uint64_t depthPassMask;
     uint64_t stencilPassMask;
     uint64_t coverageMask;
 };
 
-event Pipeline::EarlyDepthStencilInfoSampleRate
+event PipelineStats::EarlyDepthStencilInfoSampleRate
 {
     uint64_t depthPassMask;
     uint64_t stencilPassMask;
     uint64_t coverageMask;
 };
 
-event Pipeline::EarlyDepthStencilInfoNullPS
+event PipelineStats::EarlyDepthStencilInfoNullPS
 {
     uint64_t depthPassMask;
     uint64_t stencilPassMask;
     uint64_t coverageMask;
 };
 
-event Pipeline::LateDepthStencilInfoSingleSample
+event PipelineStats::LateDepthStencilInfoSingleSample
 {
     uint64_t depthPassMask;
     uint64_t stencilPassMask;
     uint64_t coverageMask;
 };
 
-event Pipeline::LateDepthStencilInfoSampleRate
+event PipelineStats::LateDepthStencilInfoSampleRate
 {
     uint64_t depthPassMask;
     uint64_t stencilPassMask;
     uint64_t coverageMask;
 };
 
-event Pipeline::LateDepthStencilInfoNullPS
+event PipelineStats::LateDepthStencilInfoNullPS
 {
     uint64_t depthPassMask;
     uint64_t stencilPassMask;
     uint64_t coverageMask;
 };
 
-event Pipeline::EarlyDepthInfoPixelRate
+event PipelineStats::EarlyDepthInfoPixelRate
 {
     uint64_t depthPassCount;
     uint64_t activeLanes;
 };
 
 
-event Pipeline::LateDepthInfoPixelRate
+event PipelineStats::LateDepthInfoPixelRate
 {
     uint64_t depthPassCount;
     uint64_t activeLanes;
 };
 
 
-event Pipeline::BackendDrawEndEvent
+event PipelineStats::BackendDrawEndEvent
 {
     uint32_t drawId;
 };
 
-event Pipeline::FrontendDrawEndEvent
+event PipelineStats::FrontendDrawEndEvent
 {
     uint32_t drawId;
 };
@@ -105,18 +105,18 @@ event Memory::MemoryStatsEndEvent
     uint32_t drawId;
 };
 
-event Pipeline::TessPrimCount
+event PipelineStats::TessPrimCount
 {
     uint64_t primCount;
 };
 
-event Pipeline::RasterTileCount
+event PipelineStats::RasterTileCount
 {
     uint32_t drawId;
     uint64_t rasterTiles;
 };
 
-event Pipeline::GSPrimInfo
+event PipelineStats::GSPrimInfo
 {
     uint64_t inputPrimCount;
     uint64_t primGeneratedCount;
@@ -128,14 +128,14 @@ event Pipeline::GSPrimInfo
 // Trivial reject is numInvocations - pop_cnt32(validMask)
 // Trivial accept is validMask & ~clipMask
 // Must clip count is pop_cnt32(clipMask)
-event Pipeline::ClipInfoEvent
+event PipelineStats::ClipInfoEvent
 {
     uint32_t numInvocations;
     uint32_t validMask;
     uint32_t clipMask;
 };
 
-event Pipeline::CullInfoEvent
+event PipelineStats::CullInfoEvent
 {
     uint32_t drawId;
     uint64_t degeneratePrimMask;
@@ -143,14 +143,14 @@ event Pipeline::CullInfoEvent
     uint32_t validMask;
 };
 
-event Pipeline::AlphaInfoEvent
+event PipelineStats::AlphaInfoEvent
 {
     uint32_t drawId;
     uint32_t alphaTestEnable;
     uint32_t alphaBlendEnable;
 };
 
-event SwrApi::DrawInstancedEvent
+event PipelineStats::DrawInstancedEvent
 {
     uint32_t drawId;
     uint32_t topology;
@@ -165,7 +165,7 @@ event SwrApi::DrawInstancedEvent
     uint32_t splitId; // Split draw count or id.
 };
 
-event SwrApi::DrawIndexedInstancedEvent
+event PipelineStats::DrawIndexedInstancedEvent
 {
     uint32_t drawId;
     uint32_t topology;
@@ -181,32 +181,32 @@ event SwrApi::DrawIndexedInstancedEvent
     uint32_t splitId; // Split draw count or id.
 };
 
-event Shader::VSStats
+event ShaderStats::VSStats
 {
     HANDLE hStats;      // SWR_SHADER_STATS
 };
 
-event Shader::HSStats
+event ShaderStats::HSStats
 {
     HANDLE hStats;      // SWR_SHADER_STATS
 };
 
-event Shader::DSStats
+event ShaderStats::DSStats
 {
     HANDLE hStats;      // SWR_SHADER_STATS
 };
 
-event Shader::GSStats
+event ShaderStats::GSStats
 {
     HANDLE hStats;      // SWR_SHADER_STATS
 };
 
-event Shader::PSStats
+event ShaderStats::PSStats
 {
     HANDLE hStats;      // SWR_SHADER_STATS
 };
 
-event Shader::CSStats
+event ShaderStats::CSStats
 {
     HANDLE hStats;      // SWR_SHADER_STATS
 };
\ No newline at end of file
index 44f2af0..140a39b 100644 (file)
@@ -39,14 +39,22 @@ def parse_event_fields(lines, idx, event_dict):
         line = lines[idx].rstrip()
         idx += 1
 
-        match = re.match(r'(\s*)([\w\*]+)(\s+)(counter\s+)*([\w]+)(\[\d+\])*', line)
+        # ex 1: uint32_t    numSampleCLZExecuted; // number of sample_cl_z instructions executed
+        # ex 2: char        reason[256]; // size of reason
+        match = re.match(r'^(\s*)([\w\*]+)(\s+)([\w]+)(\[\d+\])*;\s*(\/\/.*)*$', line)
+        # group 1 -
+        # group 2 type
+        # group 3 -
+        # group 4 name
+        # group 5 [array size]
+        # group 6 //comment
 
         if match:
             field = {
                 "type": match.group(2),
-                "name": match.group(5),
-                "size": int(match.group(6)[1:-1]) if match.group(6) else 1,
-                "counter": True if match.group(4) else False
+                "name": match.group(4),
+                "size": int(match.group(5)[1:-1]) if match.group(5) else 1,
+                "desc": match.group(6)[2:].strip() if match.group(6) else "",
             }
             fields.append(field)
 
@@ -87,6 +95,53 @@ def parse_protos(files, verbose=False):
     """
         Parses a proto file and returns a dictionary of event definitions
     """
+
+    # Protos structure:
+    #
+    # {
+    #   "events": {
+    #     "defs": {     // dict of event definitions where keys are 'group_name::event_name"
+    #       ...,
+    #       "ApiStat::DrawInfoEvent": {
+    #         "id": 3,
+    #         "group": "ApiStat",
+    #         "name": "DrawInfoEvent",  // name of event without 'group_name::' prefix
+    #         "desc": "",
+    #         "fields": [
+    #           {
+    #             "type": "uint32_t",
+    #             "name": "drawId",
+    #             "size": 1,
+    #             "desc": "",
+    #           },
+    #           ...
+    #         ]
+    #       },
+    #       ...
+    #     },
+    #     "groups": {   // dict of groups with lists of event keys
+    #       "ApiStat": [
+    #         "ApiStat::DispatchEvent",
+    #         "ApiStat::DrawInfoEvent",
+    #         ...
+    #       ],
+    #       "Framework": [
+    #         "Framework::ThreadStartApiEvent",
+    #         "Framework::ThreadStartWorkerEvent",
+    #         ...
+    #       ],
+    #       ...
+    #     },
+    #     "map": {  // map of event ids to match archrast output to event key
+    #       "1": "Framework::ThreadStartApiEvent",
+    #       "2": "Framework::ThreadStartWorkerEvent",
+    #       "3": "ApiStat::DrawInfoEvent",
+    #       ...
+    #     }
+    #   },
+    #   "enums": { ... }    // enums follow similar defs, map (groups?) structure
+    # }
+
     protos = {
         'events': {
             'defs': {},             # event dictionary containing events with their fields
@@ -111,12 +166,29 @@ def parse_protos(files, verbose=False):
 
         with open(filename, 'r') as f:
             lines = f.readlines()
-
+            in_brief = False
+            brief = []
             idx = 0
             while idx < len(lines):
                 line = lines[idx].strip()
                 idx += 1
 
+                # If currently processing a brief, keep processing or change state
+                if in_brief:
+                    match = re.match(r'^\s*\/\/\/\s*(.*)$', line)                   # i.e. "/// more event desc..."
+                    if match:
+                        brief.append(match.group(1).strip())
+                        continue
+                    else:
+                        in_brief = False
+
+                # Match event/enum brief
+                match = re.match(r'^\s*\/\/\/\s*@(brief|breif)\s*(.*)$', line)       # i.e. "///@brief My event desc..."
+                if match:
+                    in_brief = True
+                    brief.append(match.group(2).strip())
+                    continue
+
                 # Match event definition
                 match = re.match(r'event(\s*)(((\w*)::){0,1}(\w+))', line)          # i.e. "event SWTag::CounterEvent"
                 if match:
@@ -124,19 +196,27 @@ def parse_protos(files, verbose=False):
 
                     # Parse event attributes
                     event_key = match.group(2)                                      # i.e. SWTag::CounterEvent
-                    event_group = match.group(4) if match.group(4) else ""      # i.e. SWTag
+                    event_group = match.group(4) if match.group(4) else ""          # i.e. SWTag
                     event_name = match.group(5)                                     # i.e. CounterEvent
 
                     # Define event attributes
                     event = {
                         'id': event_id,
                         'group': event_group,
-                        'name': event_name
+                        'name': event_name,
+                        'desc': ' '.join(brief)
                     }
+                    # Add period at end of event desc if necessary
+                    if event["desc"] and event["desc"][-1] != '.':
+                        event["desc"] += '.'
+
+                    # Reset brief
+                    brief = []
 
                     # Now add event fields
                     idx = parse_event_fields(lines, idx, event)
 
+                    # Register event and mapping
                     protos['events']['defs'][event_key] = event
                     protos['events']['map'][event_id] = event_key
 
@@ -152,12 +232,20 @@ def parse_protos(files, verbose=False):
 
                     # Define enum attr
                     enum = {
-                        'name': enum_name
+                        'name': enum_name,
+                        'desc': ' '.join(brief)
                     }
+                    # Add period at end of event desc if necessary
+                    if enum["desc"] and enum["desc"][-1] != '.':
+                        enum["desc"] += '.'
+
+                    # Reset brief
+                    brief = []
 
                     # Now add enum fields
                     idx = parse_enums(lines, idx, enum)
 
+                    # Register enum and mapping
                     protos['enums']['defs'][enum_name] = enum
                     protos['enums']['map'][enum_id] = enum_name
 
@@ -174,10 +262,6 @@ def parse_protos(files, verbose=False):
     return protos
 
 
-def get_sorted_protos(protos):
-    protos["groups"]
-
-
 def main():
 
     # Parse args...
index 351587a..75eae35 100644 (file)
@@ -315,6 +315,34 @@ KNOBS = [
         'category'  : 'perf_adv',
     }],
 
+    ['AR_ENABLE_PIPELINE_STATS', {
+        'type'      : 'bool',
+        'default'   : 'true',
+        'desc'      : ['Enable pipeline stats when using Archrast'],
+        'category'  : 'archrast',
+    }],
+
+    ['AR_ENABLE_SHADER_STATS', {
+        'type'      : 'bool',
+        'default'   : 'true',
+        'desc'      : ['Enable shader stats when using Archrast'],
+        'category'  : 'archrast',
+    }],
+
+    ['AR_ENABLE_SWTAG_DATA', {
+        'type'      : 'bool',
+        'default'   : 'false',
+        'desc'      : ['Enable SWTag data when using Archrast'],
+        'category'  : 'archrast',
+    }],
+
+    ['AR_ENABLE_SWR_EVENTS', {
+        'type'      : 'bool',
+        'default'   : 'true',
+        'desc'      : ['Enable internal SWR events when using Archrast'],
+        'category'  : 'archrast',
+    }],
+
     ['AR_ENABLE_PIPELINE_EVENTS', {
         'type'      : 'bool',
         'default'   : 'true',
index 8079b0e..3ef99da 100644 (file)
 #include "common/os.h"
 #include "core/state.h"
 
-<% always_enabled_knob_groups = ['', 'Framework', 'SWTagApi', 'SwrApi'] %>
+<%
+    always_enabled_knob_groups = ['Framework', 'SWTagFramework', 'ApiSwr']
+    group_knob_remap_table = {
+        "ShaderStats": "KNOB_AR_ENABLE_SHADER_STATS",
+        "PipelineStats" : "KNOB_AR_ENABLE_PIPELINE_STATS",
+        "SWTagData" : "KNOB_AR_ENABLE_SWTAG_DATA",
+ }
+%>
 namespace ArchRast
 {
 <% sorted_enums = sorted(protos['enums']['defs']) %>
@@ -57,10 +64,12 @@ namespace ArchRast
     //////////////////////////////////////////////////////////////////////////
     struct Event
     {
+        const uint32_t eventId = {0xFFFFFFFF};
         Event() {}
         virtual ~Event() {}
 
         virtual bool IsEnabled() const { return true; };
+        virtual const uint32_t GetEventId() const = 0;
         virtual void Accept(EventHandler* pHandler) const = 0;
     };
 
@@ -94,6 +103,7 @@ namespace ArchRast
     struct ${event['name']} : Event
     {<%
         fields = event['fields'] %>
+        const uint32_t eventId = {${ event['id'] }};
         ${event['name']}Data data;
 
         // Constructor
@@ -135,8 +145,14 @@ namespace ArchRast
         }
 
         virtual void Accept(EventHandler* pHandler) const;
+        inline const uint32_t GetEventId() const { return eventId; }
         % if group not in always_enabled_knob_groups:
-        <%  group_knob_define = 'KNOB_AR_ENABLE_' + group.upper() + '_EVENTS' %>
+        <% 
+            if group in group_knob_remap_table:
+                group_knob_define = group_knob_remap_table[group]
+            else:
+                group_knob_define = 'KNOB_AR_ENABLE_' + group.upper() + '_EVENTS'
+        %>
         bool IsEnabled() const
         {
             static const bool IsEventEnabled = true;    // TODO: Replace with knob for each event
index 3f85c88..6e9fdb5 100644 (file)
@@ -147,9 +147,9 @@ namespace ArchRast
         virtual void Handle(const ${event['name']}& event)
         {
 % if event['num_fields'] == 0:
-            Write(${event['id']}, (char*)&event.data, 0);
+            Write(event.eventId, (char*)&event.data, 0);
 % else:
-            Write(${event['id']}, (char*)&event.data, sizeof(event.data));
+            Write(event.eventId, (char*)&event.data, sizeof(event.data));
 % endif
         }
 %       endfor
index ba1ad5e..1ef83ad 100644 (file)
@@ -67,7 +67,7 @@ void KnobBase::autoExpandEnvironmentVariables(std::string& text)
 #else
     {
         // unix style variable replacement
-        static std::regex env("\\$\\{([^}]+)\\}");
+        static std::regex env("\\$\\{([^}]+?)\\}");
         std::smatch       match;
         while (std::regex_search(text, match, env))
         {
@@ -79,7 +79,7 @@ void KnobBase::autoExpandEnvironmentVariables(std::string& text)
     }
     {
         // win32 style variable replacement
-        static std::regex env("\\%([^}]+)\\%");
+        static std::regex env("%([^%]+?)%");
         std::smatch       match;
         while (std::regex_search(text, match, env))
         {
index 3090a24..a0ddd96 100644 (file)
@@ -458,8 +458,6 @@ INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, uint32_t workerId,
     {
         ExecuteCallbacks(pContext, workerId, pDC);
 
-        // Report accumulated memory access stats
-        AR_EVENT(MemoryStatsEndEvent(pDC->drawId));
 
         // Cleanup memory allocations
         pDC->pArena->Reset(true);
index d3c732a..e0bb75c 100644 (file)
@@ -25,6 +25,7 @@
  * @brief Include file for llvm passes
  *
  ******************************************************************************/
+#pragma once
 
 #include "JitManager.h"
 #include "builder.h"