From 387599a66181958e28483b8819e647d60b7158e8 Mon Sep 17 00:00:00 2001 From: Jan Zielinski Date: Thu, 1 Aug 2019 14:30:58 +0200 Subject: [PATCH] swr/rasterizer: Refactor events collection mechanism Several improvements and cleanups in events and statstics mechanisms Reviewed-by: Alok Hota --- .../drivers/swr/rasterizer/archrast/archrast.cpp | 126 ------ .../drivers/swr/rasterizer/archrast/events.proto | 480 +++++++++------------ .../swr/rasterizer/archrast/events_private.proto | 48 +-- .../drivers/swr/rasterizer/codegen/gen_archrast.py | 108 ++++- .../drivers/swr/rasterizer/codegen/knob_defs.py | 28 ++ .../rasterizer/codegen/templates/gen_ar_event.hpp | 20 +- .../codegen/templates/gen_ar_eventhandlerfile.hpp | 4 +- .../swr/rasterizer/codegen/templates/gen_knobs.cpp | 4 +- .../drivers/swr/rasterizer/core/threads.cpp | 2 - .../swr/rasterizer/jitter/functionpasses/passes.h | 1 + 10 files changed, 382 insertions(+), 439 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp index 03df614..c1d3f2d 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp +++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp @@ -87,81 +87,6 @@ namespace ArchRast uint32_t alphaBlendCount = 0; }; - struct MemoryStats - { - struct MemoryTrackerKey - { - uint64_t address; - uint64_t mask; - }; - - struct MemoryTrackerData - { - uint32_t accessCountRead; - uint32_t accessCountWrite; - uint32_t totalSizeRead; - uint32_t totalSizeWrite; - uint64_t tscMin; - uint64_t tscMax; - }; - - struct AddressRangeComparator - { - bool operator()(MemoryTrackerKey a, MemoryTrackerKey b) const - { - return (a.address & a.mask) < (b.address & b.mask); - } - }; - - typedef std::map MemoryTrackerMap; - MemoryTrackerMap trackedMemory = {}; - - void TrackMemoryAccess(uint64_t address, uint64_t addressMask, uint8_t isRead, uint64_t tsc, uint32_t size) - { - MemoryTrackerKey key; - key.address = address; - key.mask = addressMask; - - MemoryTrackerMap::iterator i = trackedMemory.lower_bound(key); - if (i != trackedMemory.end() && !(trackedMemory.key_comp()(key, i->first))) - { - // already in map - if (isRead) - { - i->second.accessCountRead++; - i->second.totalSizeRead += size; - } - else - { - i->second.accessCountWrite++; - i->second.totalSizeWrite += size; - } - i->second.tscMax = tsc; - } - else - { - // new entry - MemoryTrackerData data; - if (isRead) - { - data.accessCountRead = 1; - data.totalSizeRead = size; - data.accessCountWrite = 0; - data.totalSizeWrite = 0; - } - else - { - data.accessCountRead = 0; - data.totalSizeRead = 0; - data.accessCountWrite = 1; - data.totalSizeWrite = size; - } - data.tscMin = tsc; - data.tscMax = tsc; - trackedMemory.insert(i, MemoryTrackerMap::value_type(key, data)); - } - } - }; ////////////////////////////////////////////////////////////////////////// /// @brief Event handler that handles API thread events. This is shared @@ -258,17 +183,6 @@ namespace ArchRast EventHandlerWorkerStats(uint32_t id) : EventHandlerFile(id), mNeedFlush(false) { memset(mShaderStats, 0, sizeof(mShaderStats)); - - // compute address mask for memory tracking - mAddressMask = 0; - uint64_t addressRangeBytes = 4096; - while (addressRangeBytes > 0) - { - mAddressMask = (mAddressMask << 1) | 1; - addressRangeBytes = addressRangeBytes >> 1; - } - mMemGranularity = mAddressMask + 1; - mAddressMask = ~mAddressMask; } virtual void Handle(const EarlyDepthStencilInfoSingleSample& event) @@ -674,42 +588,6 @@ namespace ArchRast mGS = {}; } - virtual void Handle(const MemoryAccessEvent& event) - { - uint64_t trackAddr = event.data.ptr; - uint64_t nextAddr = (trackAddr & mAddressMask); - uint32_t sizeTracked = 0; - - while (sizeTracked < event.data.size) - { - nextAddr += mMemGranularity; - uint32_t size = nextAddr - trackAddr; - size = std::min(event.data.size, size); - mMemoryStats.TrackMemoryAccess(trackAddr, mAddressMask, event.data.isRead, event.data.tsc, size); - sizeTracked += size; - trackAddr = nextAddr; - } - } - - virtual void Handle(const MemoryStatsEndEvent& event) - { - MemoryStats::MemoryTrackerMap::iterator i = mMemoryStats.trackedMemory.begin(); - while (i != mMemoryStats.trackedMemory.end()) - { - MemoryStatsEvent mse(event.data.drawId, - i->first.address & mAddressMask, - i->second.accessCountRead, - i->second.accessCountWrite, - i->second.totalSizeRead, - i->second.totalSizeWrite, - i->second.tscMin, - i->second.tscMax); - EventHandlerFile::Handle(mse); - i++; - } - mMemoryStats.trackedMemory.clear(); - } - virtual void Handle(const GSPrimInfo& event) { mGS.inputPrimCount += event.data.inputPrimCount; @@ -756,10 +634,6 @@ namespace ArchRast SWR_SHADER_STATS mShaderStats[NUM_SHADER_TYPES]; - MemoryStats mMemoryStats = {}; - uint64_t mAddressMask = 0; - uint64_t mMemGranularity = 0; - }; static EventManager* FromHandle(HANDLE hThreadContext) diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto b/src/gallium/drivers/swr/rasterizer/archrast/events.proto index 8a6093f..a530893 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto +++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto @@ -37,449 +37,391 @@ event Framework::ThreadStartWorkerEvent { }; -event SwrApi::DrawInfoEvent +///@brief Used as a helper event to indicate end of frame. Does not gaurantee to capture end of frame on all APIs +event ApiSwr::FrameEndEvent { - uint32_t drawId; - AR_DRAW_TYPE type; - uint32_t topology; - uint32_t numVertices; - uint32_t numIndices; - int32_t indexOffset; - int32_t baseVertex; - uint32_t numInstances; - uint32_t startInstance; - uint32_t tsEnable; - uint32_t gsEnable; - uint32_t soEnable; - uint32_t soTopology; - uint32_t splitId; // Split draw count or id. + uint32_t frameId; // current frame id + uint32_t nextDrawId; // next draw id (always incremental - does not reset) }; -event SwrApi::DispatchEvent +///@brief Synchonization event. +event ApiSwr::SwrSyncEvent { uint32_t drawId; - uint32_t threadGroupCountX; - uint32_t threadGroupCountY; - uint32_t threadGroupCountZ; }; -event SwrApi::FrameEndEvent +///@brief Invalidate hot tiles (i.e. tile cache) +event ApiSwr::SwrInvalidateTilesEvent { - uint32_t frameId; - uint32_t nextDrawId; + uint32_t drawId; }; -///@brief API Stat: Synchonization event. -event SwrApi::SwrSyncEvent +///@brief Invalidate and discard hot tiles within pixel region +event ApiSwr::SwrDiscardRectEvent { uint32_t drawId; }; -///@brief API Stat: Invalidate hot tiles (i.e. tile cache) -event SwrApi::SwrInvalidateTilesEvent +///@brief Flush tiles out to memory that is typically owned by driver (e.g. Flush RT cache) +event ApiSwr::SwrStoreTilesEvent { uint32_t drawId; }; -///@brief API Stat: Invalidate and discard hot tiles within pixel region -event SwrApi::SwrDiscardRectEvent +event PipelineStats::DrawInfoEvent { uint32_t drawId; + AR_DRAW_TYPE type; // type of draw (indexed, instanced, etc) + uint32_t topology; // topology of draw + uint32_t numVertices; // number of vertices for draw + uint32_t numIndices; // number of indices for draw + int32_t indexOffset; // offset into index buffer + int32_t baseVertex; // which vertex to start with + uint32_t numInstances; // number of instances to draw + uint32_t startInstance; // which instance to start fetching + uint32_t tsEnable; // tesselation enabled + uint32_t gsEnable; // geometry shader enabled + uint32_t soEnable; // stream-out enabled + uint32_t soTopology; // topology of stream-out + uint32_t splitId; // split draw count or id }; -///@brief API Stat: Flush tiles out to memory that is typically owned by driver (e.g. Flush RT cache) -event SwrApi::SwrStoreTilesEvent +event PipelineStats::DispatchEvent { uint32_t drawId; + uint32_t threadGroupCountX; // num thread groups in X dimension + uint32_t threadGroupCountY; // num thread groups in Y dimension + uint32_t threadGroupCountZ; // num thread groups in Z dimension }; -event Pipeline::FrontendStatsEvent +event PipelineStats::FrontendStatsEvent { uint32_t drawId; - uint64_t counter IaVertices; - uint64_t counter IaPrimitives; - uint64_t counter VsInvocations; - uint64_t counter HsInvocations; - uint64_t counter DsInvocations; - uint64_t counter GsInvocations; - uint64_t counter GsPrimitives; - uint64_t counter CInvocations; - uint64_t counter CPrimitives; - uint64_t counter SoPrimStorageNeeded0; - uint64_t counter SoPrimStorageNeeded1; - uint64_t counter SoPrimStorageNeeded2; - uint64_t counter SoPrimStorageNeeded3; - uint64_t counter SoNumPrimsWritten0; - uint64_t counter SoNumPrimsWritten1; - uint64_t counter SoNumPrimsWritten2; - uint64_t counter SoNumPrimsWritten3; + uint64_t IaVertices; + uint64_t IaPrimitives; + uint64_t VsInvocations; + uint64_t HsInvocations; + uint64_t DsInvocations; + uint64_t GsInvocations; + uint64_t GsPrimitives; + uint64_t CInvocations; + uint64_t CPrimitives; + uint64_t SoPrimStorageNeeded0; + uint64_t SoPrimStorageNeeded1; + uint64_t SoPrimStorageNeeded2; + uint64_t SoPrimStorageNeeded3; + uint64_t SoNumPrimsWritten0; + uint64_t SoNumPrimsWritten1; + uint64_t SoNumPrimsWritten2; + uint64_t SoNumPrimsWritten3; }; -event Pipeline::BackendStatsEvent +event PipelineStats::BackendStatsEvent { uint32_t drawId; - uint64_t counter DepthPassCount; - uint64_t counter PsInvocations; - uint64_t counter CsInvocations; + uint64_t DepthPassCount; + uint64_t PsInvocations; + uint64_t CsInvocations; }; -event Pipeline::EarlyZSingleSample +event PipelineStats::EarlyZSingleSample { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; -event Pipeline::LateZSingleSample +event PipelineStats::LateZSingleSample { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; -event Pipeline::EarlyStencilSingleSample +event PipelineStats::EarlyStencilSingleSample { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; -event Pipeline::LateStencilSingleSample +event PipelineStats::LateStencilSingleSample { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; -event Pipeline::EarlyZSampleRate +event PipelineStats::EarlyZSampleRate { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; -event Pipeline::LateZSampleRate +event PipelineStats::LateZSampleRate { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; -event Pipeline::EarlyStencilSampleRate +event PipelineStats::EarlyStencilSampleRate { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; -event Pipeline::LateStencilSampleRate +event PipelineStats::LateStencilSampleRate { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; // Total Early-Z counts, SingleSample and SampleRate -event Pipeline::EarlyZ +event PipelineStats::EarlyZ { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; // Total LateZ counts, SingleSample and SampleRate -event Pipeline::LateZ +event PipelineStats::LateZ { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; // Total EarlyStencil counts, SingleSample and SampleRate -event Pipeline::EarlyStencil +event PipelineStats::EarlyStencil { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; // Total LateStencil counts, SingleSample and SampleRate -event Pipeline::LateStencil +event PipelineStats::LateStencil { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; -event Pipeline::EarlyZNullPS +event PipelineStats::EarlyZNullPS { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; -event Pipeline::EarlyStencilNullPS +event PipelineStats::EarlyStencilNullPS { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; -event Pipeline::EarlyZPixelRate +event PipelineStats::EarlyZPixelRate { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; -event Pipeline::LateZPixelRate +event PipelineStats::LateZPixelRate { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; -event Pipeline::EarlyOmZ +event PipelineStats::EarlyOmZ { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; -event Pipeline::EarlyOmStencil +event PipelineStats::EarlyOmStencil { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; -event Pipeline::LateOmZ +event PipelineStats::LateOmZ { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; -event Pipeline::LateOmStencil +event PipelineStats::LateOmStencil { uint32_t drawId; - uint64_t counter passCount; - uint64_t counter failCount; + uint64_t passCount; + uint64_t failCount; }; -event Pipeline::GSInputPrims +event PipelineStats::GSInputPrims { uint32_t drawId; - uint64_t counter inputPrimCount; + uint64_t inputPrimCount; }; -event Pipeline::GSPrimsGen +event PipelineStats::GSPrimsGen { uint32_t drawId; - uint64_t counter primGeneratedCount; + uint64_t primGeneratedCount; }; -event Pipeline::GSVertsInput +event PipelineStats::GSVertsInput { uint32_t drawId; - uint64_t counter vertsInput; + uint64_t vertsInput; }; -event Pipeline::TessPrims +event PipelineStats::TessPrims { uint32_t drawId; - uint64_t counter primCount; + uint64_t primCount; }; -event Pipeline::RasterTiles +event PipelineStats::RasterTiles { uint32_t drawId; - uint32_t counter rastTileCount; + uint32_t rastTileCount; }; -event Pipeline::ClipperEvent +event PipelineStats::ClipperEvent { uint32_t drawId; - uint32_t counter trivialRejectCount; - uint32_t counter trivialAcceptCount; - uint32_t counter mustClipCount; + uint32_t trivialRejectCount; + uint32_t trivialAcceptCount; + uint32_t mustClipCount; }; -event Pipeline::CullEvent +event PipelineStats::CullEvent { uint32_t drawId; - uint64_t counter backfacePrimCount; - uint64_t counter degeneratePrimCount; + uint64_t backfacePrimCount; + uint64_t degeneratePrimCount; }; -event Pipeline::AlphaEvent +event PipelineStats::AlphaEvent { uint32_t drawId; - uint32_t counter alphaTestCount; - uint32_t counter alphaBlendCount; + uint32_t alphaTestCount; + uint32_t alphaBlendCount; }; -event Shader::VSInfo +event ShaderStats::VSInfo { uint32_t drawId; - uint32_t counter numInstExecuted; - uint32_t counter numSampleExecuted; - uint32_t counter numSampleLExecuted; - uint32_t counter numSampleBExecuted; - uint32_t counter numSampleCExecuted; - uint32_t counter numSampleCLZExecuted; - uint32_t counter numSampleCDExecuted; - uint32_t counter numGather4Executed; - uint32_t counter numGather4CExecuted; - uint32_t counter numGather4CPOExecuted; - uint32_t counter numGather4CPOCExecuted; - uint32_t counter numLodExecuted; + uint32_t numInstExecuted; + uint32_t numSampleExecuted; + uint32_t numSampleLExecuted; + uint32_t numSampleBExecuted; + uint32_t numSampleCExecuted; + uint32_t numSampleCLZExecuted; + uint32_t numSampleCDExecuted; + uint32_t numGather4Executed; + uint32_t numGather4CExecuted; + uint32_t numGather4CPOExecuted; + uint32_t numGather4CPOCExecuted; + uint32_t numLodExecuted; }; -event Shader::HSInfo +event ShaderStats::HSInfo { uint32_t drawId; - uint32_t counter numInstExecuted; - uint32_t counter numSampleExecuted; - uint32_t counter numSampleLExecuted; - uint32_t counter numSampleBExecuted; - uint32_t counter numSampleCExecuted; - uint32_t counter numSampleCLZExecuted; - uint32_t counter numSampleCDExecuted; - uint32_t counter numGather4Executed; - uint32_t counter numGather4CExecuted; - uint32_t counter numGather4CPOExecuted; - uint32_t counter numGather4CPOCExecuted; - uint32_t counter numLodExecuted; + uint32_t numInstExecuted; + uint32_t numSampleExecuted; + uint32_t numSampleLExecuted; + uint32_t numSampleBExecuted; + uint32_t numSampleCExecuted; + uint32_t numSampleCLZExecuted; + uint32_t numSampleCDExecuted; + uint32_t numGather4Executed; + uint32_t numGather4CExecuted; + uint32_t numGather4CPOExecuted; + uint32_t numGather4CPOCExecuted; + uint32_t numLodExecuted; }; -event Shader::DSInfo +event ShaderStats::DSInfo { uint32_t drawId; - uint32_t counter numInstExecuted; - uint32_t counter numSampleExecuted; - uint32_t counter numSampleLExecuted; - uint32_t counter numSampleBExecuted; - uint32_t counter numSampleCExecuted; - uint32_t counter numSampleCLZExecuted; - uint32_t counter numSampleCDExecuted; - uint32_t counter numGather4Executed; - uint32_t counter numGather4CExecuted; - uint32_t counter numGather4CPOExecuted; - uint32_t counter numGather4CPOCExecuted; - uint32_t counter numLodExecuted; + uint32_t numInstExecuted; + uint32_t numSampleExecuted; + uint32_t numSampleLExecuted; + uint32_t numSampleBExecuted; + uint32_t numSampleCExecuted; + uint32_t numSampleCLZExecuted; + uint32_t numSampleCDExecuted; + uint32_t numGather4Executed; + uint32_t numGather4CExecuted; + uint32_t numGather4CPOExecuted; + uint32_t numGather4CPOCExecuted; + uint32_t numLodExecuted; }; -event Shader::GSInfo +event ShaderStats::GSInfo { uint32_t drawId; - uint32_t counter numInstExecuted; - uint32_t counter numSampleExecuted; - uint32_t counter numSampleLExecuted; - uint32_t counter numSampleBExecuted; - uint32_t counter numSampleCExecuted; - uint32_t counter numSampleCLZExecuted; - uint32_t counter numSampleCDExecuted; - uint32_t counter numGather4Executed; - uint32_t counter numGather4CExecuted; - uint32_t counter numGather4CPOExecuted; - uint32_t counter numGather4CPOCExecuted; - uint32_t counter numLodExecuted; + uint32_t numInstExecuted; + uint32_t numSampleExecuted; + uint32_t numSampleLExecuted; + uint32_t numSampleBExecuted; + uint32_t numSampleCExecuted; + uint32_t numSampleCLZExecuted; + uint32_t numSampleCDExecuted; + uint32_t numGather4Executed; + uint32_t numGather4CExecuted; + uint32_t numGather4CPOExecuted; + uint32_t numGather4CPOCExecuted; + uint32_t numLodExecuted; }; -event Shader::PSInfo +event ShaderStats::PSInfo { uint32_t drawId; - uint32_t counter numInstExecuted; - uint32_t counter numSampleExecuted; - uint32_t counter numSampleLExecuted; - uint32_t counter numSampleBExecuted; - uint32_t counter numSampleCExecuted; - uint32_t counter numSampleCLZExecuted; - uint32_t counter numSampleCDExecuted; - uint32_t counter numGather4Executed; - uint32_t counter numGather4CExecuted; - uint32_t counter numGather4CPOExecuted; - uint32_t counter numGather4CPOCExecuted; - uint32_t counter numLodExecuted; + uint32_t numInstExecuted; + uint32_t numSampleExecuted; + uint32_t numSampleLExecuted; + uint32_t numSampleBExecuted; + uint32_t numSampleCExecuted; + uint32_t numSampleCLZExecuted; + uint32_t numSampleCDExecuted; + uint32_t numGather4Executed; + uint32_t numGather4CExecuted; + uint32_t numGather4CPOExecuted; + uint32_t numGather4CPOCExecuted; + uint32_t numLodExecuted; }; -event Shader::CSInfo +event ShaderStats::CSInfo { uint32_t drawId; - uint32_t counter numInstExecuted; - uint32_t counter numSampleExecuted; - uint32_t counter numSampleLExecuted; - uint32_t counter numSampleBExecuted; - uint32_t counter numSampleCExecuted; - uint32_t counter numSampleCLZExecuted; - uint32_t counter numSampleCDExecuted; - uint32_t counter numGather4Executed; - uint32_t counter numGather4CExecuted; - uint32_t counter numGather4CPOExecuted; - uint32_t counter numGather4CPOCExecuted; - uint32_t counter numLodExecuted; -}; - -event SWTagApi::SWTagEndFrameEvent -{ - uint64_t frameCount; - uint32_t renderpassCount; - uint32_t drawOrDispatchCount; - uint32_t drawCount; - uint32_t dispatchCount; -}; - -event SWTagApi::SWTagRenderpassEvent -{ - uint64_t frameCount; - uint32_t renderpassCount; - uint32_t drawOrDispatchCount; - uint32_t drawCount; - uint32_t dispatchCount; -}; - -event SWTagApi::SWTagDrawEvent -{ - uint64_t frameCount; - uint32_t renderpassCount; - uint32_t drawOrDispatchCount; - uint32_t drawCount; - uint32_t dispatchCount; + uint32_t numInstExecuted; + uint32_t numSampleExecuted; + uint32_t numSampleLExecuted; + uint32_t numSampleBExecuted; + uint32_t numSampleCExecuted; + uint32_t numSampleCLZExecuted; + uint32_t numSampleCDExecuted; + uint32_t numGather4Executed; + uint32_t numGather4CExecuted; + uint32_t numGather4CPOExecuted; + uint32_t numGather4CPOCExecuted; + uint32_t numLodExecuted; }; -event SWTagApi::SWTagDispatchEvent -{ - uint64_t frameCount; - uint32_t renderpassCount; - uint32_t drawOrDispatchCount; - uint32_t drawCount; - uint32_t dispatchCount; -}; - -event SWTagApi::SWTagDriverCallEvent -{ - char cmd[256]; -}; - -event SWTag::SWTagFlushEvent -{ - uint32_t count; - char reason[256]; - uint32_t type; -}; - -event Memory::MemoryStatsEvent -{ - uint32_t drawId; - uint64_t baseAddr; - uint32_t accessCountRead; - uint32_t accessCountWrite; - uint32_t totalSizeRead; - uint32_t totalSizeWrite; - uint64_t tscMin; - uint64_t tscMax; -}; diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto index da4419a..b57d5c4 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto +++ b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto @@ -24,68 +24,68 @@ # ArchRast is to not pollute the Rasty code with lots of calculations, etc. that # are needed to compute per draw statistics, etc. -event Pipeline::EarlyDepthStencilInfoSingleSample +event PipelineStats::EarlyDepthStencilInfoSingleSample { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event Pipeline::EarlyDepthStencilInfoSampleRate +event PipelineStats::EarlyDepthStencilInfoSampleRate { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event Pipeline::EarlyDepthStencilInfoNullPS +event PipelineStats::EarlyDepthStencilInfoNullPS { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event Pipeline::LateDepthStencilInfoSingleSample +event PipelineStats::LateDepthStencilInfoSingleSample { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event Pipeline::LateDepthStencilInfoSampleRate +event PipelineStats::LateDepthStencilInfoSampleRate { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event Pipeline::LateDepthStencilInfoNullPS +event PipelineStats::LateDepthStencilInfoNullPS { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event Pipeline::EarlyDepthInfoPixelRate +event PipelineStats::EarlyDepthInfoPixelRate { uint64_t depthPassCount; uint64_t activeLanes; }; -event Pipeline::LateDepthInfoPixelRate +event PipelineStats::LateDepthInfoPixelRate { uint64_t depthPassCount; uint64_t activeLanes; }; -event Pipeline::BackendDrawEndEvent +event PipelineStats::BackendDrawEndEvent { uint32_t drawId; }; -event Pipeline::FrontendDrawEndEvent +event PipelineStats::FrontendDrawEndEvent { uint32_t drawId; }; @@ -105,18 +105,18 @@ event Memory::MemoryStatsEndEvent uint32_t drawId; }; -event Pipeline::TessPrimCount +event PipelineStats::TessPrimCount { uint64_t primCount; }; -event Pipeline::RasterTileCount +event PipelineStats::RasterTileCount { uint32_t drawId; uint64_t rasterTiles; }; -event Pipeline::GSPrimInfo +event PipelineStats::GSPrimInfo { uint64_t inputPrimCount; uint64_t primGeneratedCount; @@ -128,14 +128,14 @@ event Pipeline::GSPrimInfo // Trivial reject is numInvocations - pop_cnt32(validMask) // Trivial accept is validMask & ~clipMask // Must clip count is pop_cnt32(clipMask) -event Pipeline::ClipInfoEvent +event PipelineStats::ClipInfoEvent { uint32_t numInvocations; uint32_t validMask; uint32_t clipMask; }; -event Pipeline::CullInfoEvent +event PipelineStats::CullInfoEvent { uint32_t drawId; uint64_t degeneratePrimMask; @@ -143,14 +143,14 @@ event Pipeline::CullInfoEvent uint32_t validMask; }; -event Pipeline::AlphaInfoEvent +event PipelineStats::AlphaInfoEvent { uint32_t drawId; uint32_t alphaTestEnable; uint32_t alphaBlendEnable; }; -event SwrApi::DrawInstancedEvent +event PipelineStats::DrawInstancedEvent { uint32_t drawId; uint32_t topology; @@ -165,7 +165,7 @@ event SwrApi::DrawInstancedEvent uint32_t splitId; // Split draw count or id. }; -event SwrApi::DrawIndexedInstancedEvent +event PipelineStats::DrawIndexedInstancedEvent { uint32_t drawId; uint32_t topology; @@ -181,32 +181,32 @@ event SwrApi::DrawIndexedInstancedEvent uint32_t splitId; // Split draw count or id. }; -event Shader::VSStats +event ShaderStats::VSStats { HANDLE hStats; // SWR_SHADER_STATS }; -event Shader::HSStats +event ShaderStats::HSStats { HANDLE hStats; // SWR_SHADER_STATS }; -event Shader::DSStats +event ShaderStats::DSStats { HANDLE hStats; // SWR_SHADER_STATS }; -event Shader::GSStats +event ShaderStats::GSStats { HANDLE hStats; // SWR_SHADER_STATS }; -event Shader::PSStats +event ShaderStats::PSStats { HANDLE hStats; // SWR_SHADER_STATS }; -event Shader::CSStats +event ShaderStats::CSStats { HANDLE hStats; // SWR_SHADER_STATS }; \ No newline at end of file diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py index 44f2af0..140a39b 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py @@ -39,14 +39,22 @@ def parse_event_fields(lines, idx, event_dict): line = lines[idx].rstrip() idx += 1 - match = re.match(r'(\s*)([\w\*]+)(\s+)(counter\s+)*([\w]+)(\[\d+\])*', line) + # ex 1: uint32_t numSampleCLZExecuted; // number of sample_cl_z instructions executed + # ex 2: char reason[256]; // size of reason + match = re.match(r'^(\s*)([\w\*]+)(\s+)([\w]+)(\[\d+\])*;\s*(\/\/.*)*$', line) + # group 1 - + # group 2 type + # group 3 - + # group 4 name + # group 5 [array size] + # group 6 //comment if match: field = { "type": match.group(2), - "name": match.group(5), - "size": int(match.group(6)[1:-1]) if match.group(6) else 1, - "counter": True if match.group(4) else False + "name": match.group(4), + "size": int(match.group(5)[1:-1]) if match.group(5) else 1, + "desc": match.group(6)[2:].strip() if match.group(6) else "", } fields.append(field) @@ -87,6 +95,53 @@ def parse_protos(files, verbose=False): """ Parses a proto file and returns a dictionary of event definitions """ + + # Protos structure: + # + # { + # "events": { + # "defs": { // dict of event definitions where keys are 'group_name::event_name" + # ..., + # "ApiStat::DrawInfoEvent": { + # "id": 3, + # "group": "ApiStat", + # "name": "DrawInfoEvent", // name of event without 'group_name::' prefix + # "desc": "", + # "fields": [ + # { + # "type": "uint32_t", + # "name": "drawId", + # "size": 1, + # "desc": "", + # }, + # ... + # ] + # }, + # ... + # }, + # "groups": { // dict of groups with lists of event keys + # "ApiStat": [ + # "ApiStat::DispatchEvent", + # "ApiStat::DrawInfoEvent", + # ... + # ], + # "Framework": [ + # "Framework::ThreadStartApiEvent", + # "Framework::ThreadStartWorkerEvent", + # ... + # ], + # ... + # }, + # "map": { // map of event ids to match archrast output to event key + # "1": "Framework::ThreadStartApiEvent", + # "2": "Framework::ThreadStartWorkerEvent", + # "3": "ApiStat::DrawInfoEvent", + # ... + # } + # }, + # "enums": { ... } // enums follow similar defs, map (groups?) structure + # } + protos = { 'events': { 'defs': {}, # event dictionary containing events with their fields @@ -111,12 +166,29 @@ def parse_protos(files, verbose=False): with open(filename, 'r') as f: lines = f.readlines() - + in_brief = False + brief = [] idx = 0 while idx < len(lines): line = lines[idx].strip() idx += 1 + # If currently processing a brief, keep processing or change state + if in_brief: + match = re.match(r'^\s*\/\/\/\s*(.*)$', line) # i.e. "/// more event desc..." + if match: + brief.append(match.group(1).strip()) + continue + else: + in_brief = False + + # Match event/enum brief + match = re.match(r'^\s*\/\/\/\s*@(brief|breif)\s*(.*)$', line) # i.e. "///@brief My event desc..." + if match: + in_brief = True + brief.append(match.group(2).strip()) + continue + # Match event definition match = re.match(r'event(\s*)(((\w*)::){0,1}(\w+))', line) # i.e. "event SWTag::CounterEvent" if match: @@ -124,19 +196,27 @@ def parse_protos(files, verbose=False): # Parse event attributes event_key = match.group(2) # i.e. SWTag::CounterEvent - event_group = match.group(4) if match.group(4) else "" # i.e. SWTag + event_group = match.group(4) if match.group(4) else "" # i.e. SWTag event_name = match.group(5) # i.e. CounterEvent # Define event attributes event = { 'id': event_id, 'group': event_group, - 'name': event_name + 'name': event_name, + 'desc': ' '.join(brief) } + # Add period at end of event desc if necessary + if event["desc"] and event["desc"][-1] != '.': + event["desc"] += '.' + + # Reset brief + brief = [] # Now add event fields idx = parse_event_fields(lines, idx, event) + # Register event and mapping protos['events']['defs'][event_key] = event protos['events']['map'][event_id] = event_key @@ -152,12 +232,20 @@ def parse_protos(files, verbose=False): # Define enum attr enum = { - 'name': enum_name + 'name': enum_name, + 'desc': ' '.join(brief) } + # Add period at end of event desc if necessary + if enum["desc"] and enum["desc"][-1] != '.': + enum["desc"] += '.' + + # Reset brief + brief = [] # Now add enum fields idx = parse_enums(lines, idx, enum) + # Register enum and mapping protos['enums']['defs'][enum_name] = enum protos['enums']['map'][enum_id] = enum_name @@ -174,10 +262,6 @@ def parse_protos(files, verbose=False): return protos -def get_sorted_protos(protos): - protos["groups"] - - def main(): # Parse args... diff --git a/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py b/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py index 351587a..75eae35 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py @@ -315,6 +315,34 @@ KNOBS = [ 'category' : 'perf_adv', }], + ['AR_ENABLE_PIPELINE_STATS', { + 'type' : 'bool', + 'default' : 'true', + 'desc' : ['Enable pipeline stats when using Archrast'], + 'category' : 'archrast', + }], + + ['AR_ENABLE_SHADER_STATS', { + 'type' : 'bool', + 'default' : 'true', + 'desc' : ['Enable shader stats when using Archrast'], + 'category' : 'archrast', + }], + + ['AR_ENABLE_SWTAG_DATA', { + 'type' : 'bool', + 'default' : 'false', + 'desc' : ['Enable SWTag data when using Archrast'], + 'category' : 'archrast', + }], + + ['AR_ENABLE_SWR_EVENTS', { + 'type' : 'bool', + 'default' : 'true', + 'desc' : ['Enable internal SWR events when using Archrast'], + 'category' : 'archrast', + }], + ['AR_ENABLE_PIPELINE_EVENTS', { 'type' : 'bool', 'default' : 'true', diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp index 8079b0e..3ef99da 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp @@ -36,7 +36,14 @@ #include "common/os.h" #include "core/state.h" -<% always_enabled_knob_groups = ['', 'Framework', 'SWTagApi', 'SwrApi'] %> +<% + always_enabled_knob_groups = ['Framework', 'SWTagFramework', 'ApiSwr'] + group_knob_remap_table = { + "ShaderStats": "KNOB_AR_ENABLE_SHADER_STATS", + "PipelineStats" : "KNOB_AR_ENABLE_PIPELINE_STATS", + "SWTagData" : "KNOB_AR_ENABLE_SWTAG_DATA", + } +%> namespace ArchRast { <% sorted_enums = sorted(protos['enums']['defs']) %> @@ -57,10 +64,12 @@ namespace ArchRast ////////////////////////////////////////////////////////////////////////// struct Event { + const uint32_t eventId = {0xFFFFFFFF}; Event() {} virtual ~Event() {} virtual bool IsEnabled() const { return true; }; + virtual const uint32_t GetEventId() const = 0; virtual void Accept(EventHandler* pHandler) const = 0; }; @@ -94,6 +103,7 @@ namespace ArchRast struct ${event['name']} : Event {<% fields = event['fields'] %> + const uint32_t eventId = {${ event['id'] }}; ${event['name']}Data data; // Constructor @@ -135,8 +145,14 @@ namespace ArchRast } virtual void Accept(EventHandler* pHandler) const; + inline const uint32_t GetEventId() const { return eventId; } % if group not in always_enabled_knob_groups: - <% group_knob_define = 'KNOB_AR_ENABLE_' + group.upper() + '_EVENTS' %> + <% + if group in group_knob_remap_table: + group_knob_define = group_knob_remap_table[group] + else: + group_knob_define = 'KNOB_AR_ENABLE_' + group.upper() + '_EVENTS' + %> bool IsEnabled() const { static const bool IsEventEnabled = true; // TODO: Replace with knob for each event diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp index 3f85c88..6e9fdb5 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp @@ -147,9 +147,9 @@ namespace ArchRast virtual void Handle(const ${event['name']}& event) { % if event['num_fields'] == 0: - Write(${event['id']}, (char*)&event.data, 0); + Write(event.eventId, (char*)&event.data, 0); % else: - Write(${event['id']}, (char*)&event.data, sizeof(event.data)); + Write(event.eventId, (char*)&event.data, sizeof(event.data)); % endif } % endfor diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp index ba1ad5e..1ef83ad 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp @@ -67,7 +67,7 @@ void KnobBase::autoExpandEnvironmentVariables(std::string& text) #else { // unix style variable replacement - static std::regex env("\\$\\{([^}]+)\\}"); + static std::regex env("\\$\\{([^}]+?)\\}"); std::smatch match; while (std::regex_search(text, match, env)) { @@ -79,7 +79,7 @@ void KnobBase::autoExpandEnvironmentVariables(std::string& text) } { // win32 style variable replacement - static std::regex env("\\%([^}]+)\\%"); + static std::regex env("%([^%]+?)%"); std::smatch match; while (std::regex_search(text, match, env)) { diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index 3090a24..a0ddd96 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -458,8 +458,6 @@ INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, uint32_t workerId, { ExecuteCallbacks(pContext, workerId, pDC); - // Report accumulated memory access stats - AR_EVENT(MemoryStatsEndEvent(pDC->drawId)); // Cleanup memory allocations pDC->pArena->Reset(true); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h index d3c732a..e0bb75c 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h @@ -25,6 +25,7 @@ * @brief Include file for llvm passes * ******************************************************************************/ +#pragma once #include "JitManager.h" #include "builder.h" -- 2.7.4