From 2eace10c62914708c0d59b3a6151da9e1a3a817c Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Mon, 9 Sep 2019 12:23:58 +0200 Subject: [PATCH] v3d: fix TF primitive counts for resume without draw The V3D documentation states that primitive counters are reset when we emit Tile Binning Mode Configuration items, which we do at the start of each draw call, however, in the actual hardware this doesn't seem to take effect when transform feedback is not active (this doesn't happen in the simulator). This causes a problem in the following scenario: glBeginTransformFeedback() glDrawArrays() glPauseTransformFeedback() glDrawArrays() glResumeTransformFeedback() glEndTransformFeedback() The TF pause will trigger a flush of the primitive counters, which results in a correct number of primitives up to that point. In theory, the counter should then be reset when we execute the draw after pausing TF, but that doesn't happen, and since TF is enabled again by the resume command before we end recording, by the time we end the transform feedback recording we again check the counters, but instead of reading 0, we read again the same value we read at the time we paused, incorrectly accumulating that value again. In theory, we should be able to avoid this by using the other method to reset the primitive counters: using operation 1 instead of 0 when we flush the counts to the buffer at the time we pause, but again, this doesn't seem to be work and we still see obsolete counts by the time we end transform feedback. This patch fixes the problem by not accumulating TF primitive counts unless we know we have actually queued draw calls during transform feedback, since that seems to effectively reset the counters. This should also be more performant, since it saves unnecessary stalls for the primitive counters to be updated when we know there haven't been any new primitives drawn. Fixes CTS tests: dEQP-GLES3.functional.transform_feedback.* Reviewed-by: Eric Anholt --- src/gallium/drivers/v3d/v3d_context.h | 6 ++++++ src/gallium/drivers/v3d/v3d_job.c | 10 +++++++++- src/gallium/drivers/v3d/v3dx_draw.c | 2 ++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/v3d/v3d_context.h b/src/gallium/drivers/v3d/v3d_context.h index 4ff17da..9022ac3 100644 --- a/src/gallium/drivers/v3d/v3d_context.h +++ b/src/gallium/drivers/v3d/v3d_context.h @@ -413,6 +413,12 @@ struct v3d_job { */ uint32_t draw_calls_queued; + /** + * Number of draw calls (not counting full buffer clears) queued in + * the current job during active transform feedback. + */ + uint32_t tf_draw_calls_queued; + struct v3d_job_key key; }; diff --git a/src/gallium/drivers/v3d/v3d_job.c b/src/gallium/drivers/v3d/v3d_job.c index d16896c..db2ce0a 100644 --- a/src/gallium/drivers/v3d/v3d_job.c +++ b/src/gallium/drivers/v3d/v3d_job.c @@ -526,8 +526,16 @@ v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job) * feedback we need to read the primitive counts and accumulate * them, otherwise they will be reset at the start of the next * draw when we emit the Tile Binning Mode Configuration packet. + * + * If the job doesn't have any TF draw calls, then we know + * the primitive count must be zero and we can skip stalling + * for this. This also fixes a problem because it seems that + * in this scenario the counters are not reset with the Tile + * Binning Mode Configuration packet, which would translate + * to us reading an obsolete (possibly non-zero) value from + * the GPU counters. */ - if (v3d->streamout.num_targets) + if (v3d->streamout.num_targets && job->tf_draw_calls_queued > 0) v3d_read_and_accumulate_primitive_counters(v3d); } diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c index 2eed8f1..efc8d24 100644 --- a/src/gallium/drivers/v3d/v3dx_draw.c +++ b/src/gallium/drivers/v3d/v3dx_draw.c @@ -897,6 +897,8 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_emit(&job->bcl, TRANSFORM_FEEDBACK_FLUSH_AND_COUNT, flush); job->draw_calls_queued++; + if (v3d->streamout.num_targets) + job->tf_draw_calls_queued++; /* Increment the TF offsets by how many verts we wrote. XXX: This * needs some clamping to the buffer size. -- 2.7.4