From 9d86a5eea79ac30bb90af363c66a5ba8529b37d8 Mon Sep 17 00:00:00 2001 From: Bruce Cherniak Date: Thu, 28 Apr 2016 12:13:15 -0500 Subject: [PATCH] swr: Remove stall waiting for core query counters. When gathering query results, swr_gather_stats was unnecessarily stalling the entire pipeline. Results are now collected asynchronously, with a fence marking completion. Reviewed-By: George Kyriazis --- src/gallium/drivers/swr/swr_fence.cpp | 6 -- src/gallium/drivers/swr/swr_fence.h | 8 ++ src/gallium/drivers/swr/swr_query.cpp | 180 ++++++++++++---------------------- src/gallium/drivers/swr/swr_query.h | 11 ++- 4 files changed, 81 insertions(+), 124 deletions(-) diff --git a/src/gallium/drivers/swr/swr_fence.cpp b/src/gallium/drivers/swr/swr_fence.cpp index 2e95b39..8a8e864 100644 --- a/src/gallium/drivers/swr/swr_fence.cpp +++ b/src/gallium/drivers/swr/swr_fence.cpp @@ -105,12 +105,6 @@ swr_fence_reference(struct pipe_screen *screen, swr_fence_destroy(old); } -static INLINE boolean -swr_is_fence_done(struct pipe_fence_handle *fence_handle) -{ - struct swr_fence *fence = swr_fence(fence_handle); - return (fence->read == fence->write); -} /* * Wait for the fence to finish. diff --git a/src/gallium/drivers/swr/swr_fence.h b/src/gallium/drivers/swr/swr_fence.h index df3776e..47f4d2e 100644 --- a/src/gallium/drivers/swr/swr_fence.h +++ b/src/gallium/drivers/swr/swr_fence.h @@ -45,6 +45,14 @@ swr_fence(struct pipe_fence_handle *fence) return (struct swr_fence *)fence; } + +static INLINE boolean +swr_is_fence_done(struct pipe_fence_handle *fence_handle) +{ + struct swr_fence *fence = swr_fence(fence_handle); + return (fence->read == fence->write); +} + static INLINE boolean swr_is_fence_pending(struct pipe_fence_handle *fence_handle) { diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp index f038a6e..5c59965 100644 --- a/src/gallium/drivers/swr/swr_query.cpp +++ b/src/gallium/drivers/swr/swr_query.cpp @@ -62,10 +62,8 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q) struct swr_query *pq = swr_query(q); if (pq->fence) { - if (!swr_is_fence_pending(pq->fence)) { - swr_fence_submit(swr_context(pipe), pq->fence); + if (swr_is_fence_pending(pq->fence)) swr_fence_finish(pipe->screen, pq->fence, 0); - } swr_fence_reference(pipe->screen, &pq->fence, NULL); } @@ -73,100 +71,45 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q) } -// XXX Create a fence callback, rather than stalling SwrWaitForIdle static void swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq) { struct swr_context *ctx = swr_context(pipe); assert(pq->result); - union pipe_query_result *result = pq->result; + struct swr_query_result *result = pq->result; boolean enable_stats = pq->enable_stats; - SWR_STATS swr_stats = {0}; - - if (pq->fence) { - if (!swr_is_fence_pending(pq->fence)) { - swr_fence_submit(ctx, pq->fence); - swr_fence_finish(pipe->screen, pq->fence, 0); - } - swr_fence_reference(pipe->screen, &pq->fence, NULL); - } - /* - * These queries don't need SWR Stats enabled in the core - * Set and return. - */ + /* A few results don't require the core, so don't involve it */ switch (pq->type) { case PIPE_QUERY_TIMESTAMP: case PIPE_QUERY_TIME_ELAPSED: - result->u64 = swr_get_timestamp(pipe->screen); - return; + result->timestamp = swr_get_timestamp(pipe->screen); break; case PIPE_QUERY_TIMESTAMP_DISJOINT: - /* nothing to do here */ - return; - break; case PIPE_QUERY_GPU_FINISHED: - result->b = TRUE; /* XXX TODO Add an api func to SWR to compare drawId - vs LastRetiredId? */ - return; + /* nothing to do here */ break; default: - /* Any query that needs SwrCore stats */ - break; - } - - /* - * All other results are collected from SwrCore counters - */ + /* + * All other results are collected from SwrCore counters via + * SwrGetStats. This returns immediately, but results are later filled + * in by the backend. Fence status is the only indication of + * completion. */ + SwrGetStats(ctx->swrContext, &result->core); + + if (!pq->fence) { + struct swr_screen *screen = swr_screen(pipe->screen); + swr_fence_reference(pipe->screen, &pq->fence, screen->flush_fence); + } + swr_fence_submit(ctx, pq->fence); - /* XXX, Should turn this into a fence callback and skip the stall */ - SwrGetStats(ctx->swrContext, &swr_stats); - /* SwrGetStats returns immediately, wait for collection */ - SwrWaitForIdle(ctx->swrContext); + /* Only change stat collection if there are no active queries */ + if (ctx->active_queries == 0) + SwrEnableStats(ctx->swrContext, enable_stats); - switch (pq->type) { - case PIPE_QUERY_OCCLUSION_PREDICATE: - case PIPE_QUERY_OCCLUSION_COUNTER: - result->u64 = swr_stats.DepthPassCount; - break; - case PIPE_QUERY_PRIMITIVES_GENERATED: - result->u64 = swr_stats.IaPrimitives; - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - result->u64 = swr_stats.SoNumPrimsWritten[pq->index]; - break; - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: { - struct pipe_query_data_so_statistics *so_stats = &result->so_statistics; - so_stats->num_primitives_written = - swr_stats.SoNumPrimsWritten[pq->index]; - so_stats->primitives_storage_needed = - swr_stats.SoPrimStorageNeeded[pq->index]; - } break; - case PIPE_QUERY_PIPELINE_STATISTICS: { - struct pipe_query_data_pipeline_statistics *p_stats = - &result->pipeline_statistics; - p_stats->ia_vertices = swr_stats.IaVertices; - p_stats->ia_primitives = swr_stats.IaPrimitives; - p_stats->vs_invocations = swr_stats.VsInvocations; - p_stats->gs_invocations = swr_stats.GsInvocations; - p_stats->gs_primitives = swr_stats.GsPrimitives; - p_stats->c_invocations = swr_stats.CPrimitives; - p_stats->c_primitives = swr_stats.CPrimitives; - p_stats->ps_invocations = swr_stats.PsInvocations; - p_stats->hs_invocations = swr_stats.HsInvocations; - p_stats->ds_invocations = swr_stats.DsInvocations; - p_stats->cs_invocations = swr_stats.CsInvocations; - } break; - default: - assert(0 && "Unsupported query"); break; } - - /* Only change stat collection if there are no active queries */ - if (ctx->active_queries == 0) - SwrEnableStats(ctx->swrContext, enable_stats); } @@ -176,16 +119,16 @@ swr_get_query_result(struct pipe_context *pipe, boolean wait, union pipe_query_result *result) { - struct swr_context *ctx = swr_context(pipe); struct swr_query *pq = swr_query(q); + struct swr_query_result *start = &pq->start; + struct swr_query_result *end = &pq->end; + unsigned index = pq->index; if (pq->fence) { - if (!swr_is_fence_pending(pq->fence)) { - swr_fence_submit(ctx, pq->fence); - if (!wait) - return FALSE; - swr_fence_finish(pipe->screen, pq->fence, 0); - } + if (!wait && !swr_is_fence_done(pq->fence)) + return FALSE; + + swr_fence_finish(pipe->screen, pq->fence, 0); swr_fence_reference(pipe->screen, &pq->fence, NULL); } @@ -194,62 +137,67 @@ swr_get_query_result(struct pipe_context *pipe, switch (pq->type) { /* Booleans */ case PIPE_QUERY_OCCLUSION_PREDICATE: - result->b = pq->end.u64 != pq->start.u64 ? TRUE : FALSE; + result->b = end->core.DepthPassCount != start->core.DepthPassCount; break; case PIPE_QUERY_GPU_FINISHED: - result->b = pq->end.b; + result->b = TRUE; break; /* Counters */ case PIPE_QUERY_OCCLUSION_COUNTER: + result->u64 = end->core.DepthPassCount - start->core.DepthPassCount; + break; case PIPE_QUERY_TIMESTAMP: case PIPE_QUERY_TIME_ELAPSED: + result->u64 = end->timestamp - start->timestamp; + break; case PIPE_QUERY_PRIMITIVES_GENERATED: + result->u64 = end->core.IaPrimitives - start->core.IaPrimitives; case PIPE_QUERY_PRIMITIVES_EMITTED: - result->u64 = pq->end.u64 - pq->start.u64; + result->u64 = end->core.SoNumPrimsWritten[index] + - start->core.SoNumPrimsWritten[index]; break; /* Structures */ case PIPE_QUERY_SO_STATISTICS: { struct pipe_query_data_so_statistics *so_stats = &result->so_statistics; - struct pipe_query_data_so_statistics *start = &pq->start.so_statistics; - struct pipe_query_data_so_statistics *end = &pq->end.so_statistics; + struct SWR_STATS *start = &pq->start.core; + struct SWR_STATS *end = &pq->end.core; so_stats->num_primitives_written = - end->num_primitives_written - start->num_primitives_written; + end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index]; so_stats->primitives_storage_needed = - end->primitives_storage_needed - start->primitives_storage_needed; + end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index]; } break; - case PIPE_QUERY_TIMESTAMP_DISJOINT: { + case PIPE_QUERY_TIMESTAMP_DISJOINT: /* os_get_time_nano returns nanoseconds */ result->timestamp_disjoint.frequency = UINT64_C(1000000000); result->timestamp_disjoint.disjoint = FALSE; - } break; + break; case PIPE_QUERY_PIPELINE_STATISTICS: { struct pipe_query_data_pipeline_statistics *p_stats = &result->pipeline_statistics; - struct pipe_query_data_pipeline_statistics *start = - &pq->start.pipeline_statistics; - struct pipe_query_data_pipeline_statistics *end = - &pq->end.pipeline_statistics; - p_stats->ia_vertices = end->ia_vertices - start->ia_vertices; - p_stats->ia_primitives = end->ia_primitives - start->ia_primitives; - p_stats->vs_invocations = end->vs_invocations - start->vs_invocations; - p_stats->gs_invocations = end->gs_invocations - start->gs_invocations; - p_stats->gs_primitives = end->gs_primitives - start->gs_primitives; - p_stats->c_invocations = end->c_invocations - start->c_invocations; - p_stats->c_primitives = end->c_primitives - start->c_primitives; - p_stats->ps_invocations = end->ps_invocations - start->ps_invocations; - p_stats->hs_invocations = end->hs_invocations - start->hs_invocations; - p_stats->ds_invocations = end->ds_invocations - start->ds_invocations; - p_stats->cs_invocations = end->cs_invocations - start->cs_invocations; - } break; + struct SWR_STATS *start = &pq->start.core; + struct SWR_STATS *end = &pq->end.core; + p_stats->ia_vertices = end->IaVertices - start->IaVertices; + p_stats->ia_primitives = end->IaPrimitives - start->IaPrimitives; + p_stats->vs_invocations = end->VsInvocations - start->VsInvocations; + p_stats->gs_invocations = end->GsInvocations - start->GsInvocations; + p_stats->gs_primitives = end->GsPrimitives - start->GsPrimitives; + p_stats->c_invocations = end->CPrimitives - start->CPrimitives; + p_stats->c_primitives = end->CPrimitives - start->CPrimitives; + p_stats->ps_invocations = end->PsInvocations - start->PsInvocations; + p_stats->hs_invocations = end->HsInvocations - start->HsInvocations; + p_stats->ds_invocations = end->DsInvocations - start->DsInvocations; + p_stats->cs_invocations = end->CsInvocations - start->CsInvocations; + } break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: { - struct pipe_query_data_so_statistics *start = &pq->start.so_statistics; - struct pipe_query_data_so_statistics *end = &pq->end.so_statistics; + struct SWR_STATS *start = &pq->start.core; + struct SWR_STATS *end = &pq->end.core; uint64_t num_primitives_written = - end->num_primitives_written - start->num_primitives_written; + end->SoNumPrimsWritten[index] - start->SoNumPrimsWritten[index]; uint64_t primitives_storage_needed = - end->primitives_storage_needed - start->primitives_storage_needed; + end->SoPrimStorageNeeded[index] - start->SoPrimStorageNeeded[index]; result->b = num_primitives_written > primitives_storage_needed; - } break; + } + break; default: assert(0 && "Unsupported query"); break; @@ -264,6 +212,8 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q) struct swr_context *ctx = swr_context(pipe); struct swr_query *pq = swr_query(q); + assert(!pq->enable_stats && "swr_begin_query: Query is already active!"); + /* Initialize Results */ memset(&pq->start, 0, sizeof(pq->start)); memset(&pq->end, 0, sizeof(pq->end)); @@ -276,7 +226,7 @@ swr_begin_query(struct pipe_context *pipe, struct pipe_query *q) /* override start timestamp to 0 for TIMESTAMP query */ if (pq->type == PIPE_QUERY_TIMESTAMP) - pq->start.u64 = 0; + pq->start.timestamp = 0; return true; } diff --git a/src/gallium/drivers/swr/swr_query.h b/src/gallium/drivers/swr/swr_query.h index 836d07b..0ab034d 100644 --- a/src/gallium/drivers/swr/swr_query.h +++ b/src/gallium/drivers/swr/swr_query.h @@ -27,13 +27,18 @@ #include +struct swr_query_result { + SWR_STATS core; + uint64_t timestamp; +}; + struct swr_query { unsigned type; /* PIPE_QUERY_* */ unsigned index; - union pipe_query_result *result; - union pipe_query_result start; - union pipe_query_result end; + struct swr_query_result *result; + struct swr_query_result start; + struct swr_query_result end; struct pipe_fence_handle *fence; -- 2.7.4