From: Tim Rowley Date: Fri, 5 Aug 2016 17:17:25 +0000 (-0600) Subject: swr: [rasterizer core] fundamentally change how stats work X-Git-Tag: upstream/17.1.0~7445 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6625fd08db0a24fad54d69ca1eb3935304b3a53e;p=platform%2Fupstream%2Fmesa.git swr: [rasterizer core] fundamentally change how stats work Add a per draw stats callback to update driver stats. Signed-off-by: Tim Rowley --- diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 3922606..5ebefab 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -143,6 +143,7 @@ HANDLE SwrCreateContext( pContext->pfnStoreTile = pCreateInfo->pfnStoreTile; pContext->pfnClearTile = pCreateInfo->pfnClearTile; pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset; + pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats; // pass pointer to bucket manager back to caller #ifdef KNOB_ENABLE_RDTSC @@ -1519,18 +1520,7 @@ void SwrGetStats( HANDLE hContext, SWR_STATS* pStats) { - SWR_CONTEXT *pContext = GetContext(hContext); - DRAW_CONTEXT* pDC = GetDrawContext(pContext); - - pDC->FeWork.type = QUERYSTATS; - pDC->FeWork.pfnWork = ProcessQueryStats; - pDC->FeWork.desc.queryStats.pStats = pStats; - - // cannot execute until all previous draws have completed - pDC->dependent = true; - - //enqueue - QueueDraw(pContext); + SWR_ASSERT(0); } ////////////////////////////////////////////////////////////////////////// diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h index d7621d5..9c80526 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.h +++ b/src/gallium/drivers/swr/rasterizer/core/api.h @@ -88,6 +88,13 @@ typedef void(SWR_API *PFN_CLEAR_TILE)(HANDLE hPrivateContext, typedef void(SWR_API *PFN_UPDATE_SO_WRITE_OFFSET)(HANDLE hPrivateContext, uint32_t soBufferSlot, uint32_t soWriteOffset); +////////////////////////////////////////////////////////////////////////// +/// @brief Callback to allow driver to update their copy of stats. +/// @param hPrivateContext - handle to private data +/// @param pStats - pointer to draw stats +typedef void(SWR_API *PFN_UPDATE_STATS)(HANDLE hPrivateContext, + const SWR_STATS* pStats); + class BucketManager; ////////////////////////////////////////////////////////////////////////// @@ -118,6 +125,7 @@ struct SWR_CREATECONTEXT_INFO PFN_STORE_TILE pfnStoreTile; PFN_CLEAR_TILE pfnClearTile; PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset; + PFN_UPDATE_STATS pfnUpdateStats; // Pointer to rdtsc buckets mgr returned to the caller. // Only populated when KNOB_ENABLE_RDTSC is set diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h index 47fea16..b38ec46 100644 --- a/src/gallium/drivers/swr/rasterizer/core/context.h +++ b/src/gallium/drivers/swr/rasterizer/core/context.h @@ -372,6 +372,8 @@ struct DRAW_DYNAMIC_STATE ///@todo Currently assumes only a single FE can do stream output for a draw. uint32_t SoWriteOffset[4]; bool SoWriteOffsetDirty[4]; + + SWR_STATS stats[KNOB_MAX_NUM_THREADS]; }; // Draw Context @@ -480,6 +482,7 @@ struct SWR_CONTEXT PFN_STORE_TILE pfnStoreTile; PFN_CLEAR_TILE pfnClearTile; PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset; + PFN_UPDATE_STATS pfnUpdateStats; // Global Stats SWR_STATS stats[KNOB_MAX_NUM_THREADS]; @@ -496,4 +499,4 @@ struct SWR_CONTEXT void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId); void WakeAllThreads(SWR_CONTEXT *pContext); -#define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pContext->stats[workerId].name += count; } +#define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.stats[workerId].name += count; } diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp index 434c8cb..fb17af1 100644 --- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp @@ -306,8 +306,48 @@ bool CheckDependency(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t lastReti return pDC->dependent && IDComparesLess(lastRetiredDraw, pDC->drawId - 1); } +////////////////////////////////////////////////////////////////////////// +/// @brief Update client stats. +INLINE void UpdateClientStats(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC) +{ + if ((pContext->pfnUpdateStats == nullptr) || (GetApiState(pDC).enableStats == false)) + { + return; + } + + DRAW_DYNAMIC_STATE& dynState = pDC->dynState; + SWR_STATS stats{ 0 }; + + // Sum up stats across all workers before sending to client. + for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i) + { + stats.DepthPassCount += dynState.stats[i].DepthPassCount; + stats.IaVertices += dynState.stats[i].IaVertices; + stats.IaPrimitives += dynState.stats[i].IaPrimitives; + stats.VsInvocations += dynState.stats[i].VsInvocations; + stats.HsInvocations += dynState.stats[i].HsInvocations; + stats.DsInvocations += dynState.stats[i].DsInvocations; + stats.GsInvocations += dynState.stats[i].GsInvocations; + stats.PsInvocations += dynState.stats[i].PsInvocations; + stats.CInvocations += dynState.stats[i].CInvocations; + stats.CsInvocations += dynState.stats[i].CsInvocations; + stats.CPrimitives += dynState.stats[i].CPrimitives; + stats.GsPrimitives += dynState.stats[i].GsPrimitives; + + for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream) + { + stats.SoPrimStorageNeeded[stream] += dynState.stats[i].SoPrimStorageNeeded[stream]; + stats.SoNumPrimsWritten[stream] += dynState.stats[i].SoNumPrimsWritten[stream]; + } + } + + pContext->pfnUpdateStats(GetPrivateState(pDC), &stats); +} + INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC) { + UpdateClientStats(pContext, pDC); + if (pDC->retireCallback.pfnCallbackFunc) { pDC->retireCallback.pfnCallbackFunc(pDC->retireCallback.userData, diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp index c8d5cd6..53d2b93 100644 --- a/src/gallium/drivers/swr/swr_context.cpp +++ b/src/gallium/drivers/swr/swr_context.cpp @@ -344,6 +344,36 @@ swr_render_condition(struct pipe_context *pipe, ctx->render_cond_cond = condition; } +static void +swr_UpdateStats(HANDLE hPrivateContext, const SWR_STATS *pStats) +{ + swr_draw_context *pDC = (swr_draw_context*)hPrivateContext; + + if (!pDC) + return; + + struct swr_context *ctx = (struct swr_context *)pDC->swr_ctx; + + SWR_STATS *pSwrStats = &ctx->stats; + pSwrStats->DepthPassCount += pStats->DepthPassCount; + pSwrStats->IaVertices += pStats->IaVertices; + pSwrStats->IaPrimitives += pStats->IaPrimitives; + pSwrStats->VsInvocations += pStats->VsInvocations; + pSwrStats->HsInvocations += pStats->HsInvocations; + pSwrStats->DsInvocations += pStats->DsInvocations; + pSwrStats->GsInvocations += pStats->GsInvocations; + pSwrStats->PsInvocations += pStats->PsInvocations; + pSwrStats->CsInvocations += pStats->CsInvocations; + pSwrStats->CInvocations += pStats->CInvocations; + pSwrStats->CPrimitives += pStats->CPrimitives; + pSwrStats->GsPrimitives += pStats->GsPrimitives; + + for (unsigned i = 0; i < 4; i++) { + pSwrStats->SoPrimStorageNeeded[i] += pStats->SoPrimStorageNeeded[i]; + pSwrStats->SoNumPrimsWritten[i] += pStats->SoNumPrimsWritten[i]; + } +} + struct pipe_context * swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) { @@ -358,6 +388,7 @@ swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) createInfo.pfnLoadTile = swr_LoadHotTile; createInfo.pfnStoreTile = swr_StoreHotTile; createInfo.pfnClearTile = swr_StoreHotTileClear; + createInfo.pfnUpdateStats = swr_UpdateStats; ctx->swrContext = SwrCreateContext(&createInfo); /* Init Load/Store/ClearTiles Tables */ diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h index 75ecae3..4133720 100644 --- a/src/gallium/drivers/swr/swr_context.h +++ b/src/gallium/drivers/swr/swr_context.h @@ -92,6 +92,7 @@ struct swr_draw_context { float userClipPlanes[PIPE_MAX_CLIP_PLANES][4]; SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS]; + void *swr_ctx; }; /* gen_llvm_types FINI */ @@ -157,6 +158,8 @@ struct swr_context { /* SWR private state - draw context */ struct swr_draw_context swrDC; + SWR_STATS stats; + unsigned dirty; /**< Mask of SWR_NEW_x flags */ }; @@ -171,6 +174,7 @@ swr_update_draw_context(struct swr_context *ctx) { swr_draw_context *pDC = (swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext); + ctx->swrDC.swr_ctx = ctx; memcpy(pDC, &ctx->swrDC, sizeof(swr_draw_context)); } diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp index 5b8f059..35d0e53 100644 --- a/src/gallium/drivers/swr/swr_query.cpp +++ b/src/gallium/drivers/swr/swr_query.cpp @@ -91,18 +91,17 @@ swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq) /* nothing to do here */ break; default: - /* - * All other results are collected from SwrCore counters via - * SwrGetStats. This returns immediately, but results are later filled - * in by the backend. Fence status is the only indication of - * completion. */ - SwrGetStats(ctx->swrContext, &result->core); + /* TODO: should fence instead of stalling pipeline */ + SwrWaitForIdle(ctx->swrContext); + memcpy(&result->core, &ctx->stats, sizeof(result->core)); +#if 0 if (!pq->fence) { struct swr_screen *screen = swr_screen(pipe->screen); swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence); } swr_fence_submit(ctx, pq->fence); +#endif /* Only change stat collection if there are no active queries */ if (ctx->active_queries == 0)