From: George Kyriazis Date: Wed, 7 Mar 2018 01:32:53 +0000 (-0600) Subject: swr/rast: Add some archrast stats X-Git-Tag: upstream/18.1.0~134 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0f6fef96329314262cf03fe91734b0ce9cce3fa0;p=platform%2Fupstream%2Fmesa.git swr/rast: Add some archrast stats Add stats for degenerate and backfacing primitive counts Wire archrast stats for alpha blend and alpha test. pass value to jitter, upon return have archrast event increment a value Reviewed-by: Bruce Cherniak --- diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp index 1f87dba..12dfc0e 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp +++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp @@ -73,6 +73,18 @@ namespace ArchRast uint32_t rasterTiles = 0; }; + struct CullStats + { + uint32_t degeneratePrimCount = 0; + uint32_t backfacePrimCount = 0; + }; + + struct AlphaStats + { + uint32_t alphaTestCount = 0; + uint32_t alphaBlendCount = 0; + }; + ////////////////////////////////////////////////////////////////////////// /// @brief Event handler that handles API thread events. This is shared /// between the API and its caller (e.g. driver shim) but typically @@ -280,7 +292,12 @@ namespace ArchRast // Rasterized Subspans EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles)); - //Reset Internal Counters + // Alpha Subspans + EventHandlerFile::Handle(AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount)); + + // Primitive Culling + EventHandlerFile::Handle(CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount)); + mDSSingleSample = {}; mDSSampleRate = {}; mDSCombined = {}; @@ -288,6 +305,8 @@ namespace ArchRast mDSNullPS = {}; rastStats = {}; + mCullStats = {}; + mAlphaStats = {}; mNeedFlush = false; } @@ -327,6 +346,18 @@ namespace ArchRast rastStats.rasterTiles += event.data.rasterTiles; } + virtual void Handle(const CullInfoEvent& event) + { + mCullStats.degeneratePrimCount += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask)); + mCullStats.backfacePrimCount += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask)); + } + + virtual void Handle(const AlphaInfoEvent& event) + { + mAlphaStats.alphaTestCount += event.data.alphaTestEnable; + mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable; + } + protected: bool mNeedFlush; // Per draw stats @@ -340,6 +371,8 @@ namespace ArchRast TEStats mTS = {}; GSStats mGS = {}; RastStats rastStats = {}; + CullStats mCullStats = {}; + AlphaStats mAlphaStats = {}; }; diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto b/src/gallium/drivers/swr/rasterizer/archrast/events.proto index 7d9a68d..deb0373 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto +++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto @@ -180,6 +180,7 @@ event LateStencilSampleRate uint64_t failCount; }; +// Total Early-Z counts, SingleSample and SampleRate event EarlyZ { uint32_t drawId; @@ -187,6 +188,7 @@ event EarlyZ uint64_t failCount; }; +// Total LateZ counts, SingleSample and SampleRate event LateZ { uint32_t drawId; @@ -194,6 +196,7 @@ event LateZ uint64_t failCount; }; +// Total EarlyStencil counts, SingleSample and SampleRate event EarlyStencil { uint32_t drawId; @@ -201,6 +204,7 @@ event EarlyStencil uint64_t failCount; }; +// Total LateStencil counts, SingleSample and SampleRate event LateStencil { uint32_t drawId; @@ -302,3 +306,18 @@ event ClipperEvent uint32_t trivialAcceptCount; uint32_t mustClipCount; }; + +event CullEvent +{ + uint32_t drawId; + uint64_t backfacePrimCount; + uint64_t degeneratePrimCount; +}; + +event AlphaEvent +{ + uint32_t drawId; + uint32_t alphaTestCount; + uint32_t alphaBlendCount; +}; + diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto index f0a9310..37593be 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto +++ b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto @@ -120,6 +120,21 @@ event ClipInfoEvent uint32_t clipMask; }; +event CullInfoEvent +{ + uint32_t drawId; + uint64_t degeneratePrimMask; + uint64_t backfacePrimMask; + uint32_t validMask; +}; + +event AlphaInfoEvent +{ + uint32_t drawId; + uint32_t alphaTestEnable; + uint32_t alphaBlendEnable; +}; + event DrawInstancedEvent { uint32_t drawId; diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h index 8c539e3..dd349a1 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h +++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h @@ -709,8 +709,8 @@ static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_P } // Merge Output to 4x2 SIMD Tile Format -INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, - const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask) +INLINE void OutputMerger4x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, + const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, uint32_t workerId) { // type safety guaranteed from template instantiation in BEChooser<>::GetFunc const uint32_t rasterTileColorOffset = RasterTileColorOffset(sample); @@ -747,6 +747,9 @@ INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW } } + // Track alpha events + AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended)); + // final write mask simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask)); @@ -777,8 +780,8 @@ INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW #if USE_8x2_TILE_BACKEND // Merge Output to 8x2 SIMD16 Tile Format -INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, - const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset) +INLINE void OutputMerger8x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState, + const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset, uint32_t workerId) { // type safety guaranteed from template instantiation in BEChooser<>::GetFunc uint32_t rasterTileColorOffset = RasterTileColorOffset(sample); @@ -836,6 +839,9 @@ INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW } } + // Track alpha events + AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended)); + // final write mask simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask)); @@ -1003,9 +1009,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t // broadcast the results of the PS to all passing pixels #if USE_8x2_TILE_BACKEND - OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, useAlternateOffset); + OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, useAlternateOffset, workerId); #else // USE_8x2_TILE_BACKEND - OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask); + OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, workerId); #endif // USE_8x2_TILE_BACKEND if(!state.psState.forceEarlyZ && !T::bForcedSampleCount) diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp index 16418f7..4982025 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp @@ -196,9 +196,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_ // output merger RDTSC_BEGIN(BEOutputMerger, pDC->drawId); #if USE_8x2_TILE_BACKEND - OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset); + OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId); #else - OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask); + OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId); #endif // do final depth write after all pixel kills diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp index 4cc1ed5..452fba1 100644 --- a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp @@ -181,9 +181,9 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3 // output merger RDTSC_BEGIN(BEOutputMerger, pDC->drawId); #if USE_8x2_TILE_BACKEND - OutputMerger8x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset); + OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId); #else - OutputMerger4x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask); + OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId, workerId); #endif // do final depth write after all pixel kills diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp index c9a37cb..d31fd37 100644 --- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp @@ -773,6 +773,8 @@ void SIMDCALL BinTrianglesImpl( RDTSC_EVENT(FECullZeroAreaAndBackface, _mm_popcnt_u32(origTriMask ^ triMask), 0); } + AR_EVENT(CullInfoEvent(pDC->drawId, cullZeroAreaMask, cullTris, origTriMask)); + /// Note: these variable initializations must stay above any 'goto endBenTriangles' // compute per tri backface uint32_t frontFaceMask = frontWindingTris; diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h index 8c26ec6..22acbe0 100644 --- a/src/gallium/drivers/swr/rasterizer/core/state.h +++ b/src/gallium/drivers/swr/rasterizer/core/state.h @@ -887,6 +887,8 @@ struct SWR_BLEND_CONTEXT simdvector* result; simdscalari* oMask; simdscalari* pMask; + uint32_t isAlphaTested; + uint32_t isAlphaBlended; }; ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp index 6b7efbf..912a88f 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp @@ -557,6 +557,8 @@ struct BlendJit : public Builder ppoMask->setName("ppoMask"); Value* ppMask = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_pMask }); ppMask->setName("pMask"); + Value* AlphaTest1 = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended }); + ppMask->setName("AlphaTest1"); static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format"); Value* dst[4]; @@ -590,12 +592,22 @@ struct BlendJit : public Builder // alpha test if (state.desc.alphaTestEnable) { + // Gather for archrast stats + STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested }); AlphaTest(state, pBlendState, pSrc0Alpha, ppMask); } + else + { + // Gather for archrast stats + STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested }); + } // color blend if (state.blendState.blendEnable) { + // Gather for archrast stats + STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended }); + // clamp sources Clamp(state.format, src); Clamp(state.format, src1); @@ -647,6 +659,11 @@ struct BlendJit : public Builder STORE(result[i], pResult, { 0, i }); } } + else + { + // Gather for archrast stats + STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended }); + } if(state.blendState.logicOpEnable) {