From 1411796f200de86d037578752450a9853359670c Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Mon, 27 Jun 2022 17:20:05 +0300 Subject: [PATCH] gpu: host1x: Rewrite job opcode sequence For new (Tegra186+) SoCs, use a new ('full-featured') job opcode sequence that is compatible with virtualization. In particular, the Host1x hardware in Tegra234 is more strict regarding the sequence, requiring ACQUIRE_MLOCK-SETCLASS-SETSTREAMID opcodes to occur in that sequence without gaps (except for SETPAYLOAD), so let's do it properly in one go now. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- drivers/gpu/host1x/hw/channel_hw.c | 144 ++++++++++++++++++++++--------------- 1 file changed, 85 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index f84caf0..4eb7fb2 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -47,10 +47,41 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo, } } -static void submit_wait(struct host1x_cdma *cdma, u32 id, u32 threshold, +static void submit_wait(struct host1x_job *job, u32 id, u32 threshold, u32 next_class) { -#if HOST1X_HW >= 2 + struct host1x_cdma *cdma = &job->channel->cdma; + +#if HOST1X_HW >= 6 + u32 stream_id; + + /* + * If a memory context has been set, use it. Otherwise + * (if context isolation is disabled) use the engine's + * firmware stream ID. + */ + if (job->memory_context) + stream_id = job->memory_context->stream_id; + else + stream_id = job->engine_fallback_streamid; + + host1x_cdma_push_wide(cdma, + host1x_opcode_setclass( + HOST1X_CLASS_HOST1X, + HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32, + /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */ + BIT(0) | BIT(2) + ), + threshold, + id, + HOST1X_OPCODE_NOP + ); + host1x_cdma_push_wide(&job->channel->cdma, + host1x_opcode_setclass(job->class, 0, 0), + host1x_opcode_setpayload(stream_id), + host1x_opcode_setstreamid(job->engine_streamid_offset / 4), + HOST1X_OPCODE_NOP); +#elif HOST1X_HW >= 2 host1x_cdma_push_wide(cdma, host1x_opcode_setclass( HOST1X_CLASS_HOST1X, @@ -97,7 +128,7 @@ static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base) else threshold = cmd->wait.threshold; - submit_wait(cdma, cmd->wait.id, threshold, cmd->wait.next_class); + submit_wait(job, cmd->wait.id, threshold, cmd->wait.next_class); } else { struct host1x_job_gather *g = &cmd->gather; @@ -180,42 +211,70 @@ static void host1x_enable_gather_filter(struct host1x_channel *ch) #endif } -static void host1x_channel_program_engine_streamid(struct host1x_job *job) +static void channel_program_cdma(struct host1x_job *job) { + struct host1x_cdma *cdma = &job->channel->cdma; + struct host1x_syncpt *sp = job->syncpt; + #if HOST1X_HW >= 6 u32 fence; - if (!job->memory_context) - return; + /* Enter engine class with invalid stream ID. */ + host1x_cdma_push_wide(cdma, + host1x_opcode_acquire_mlock(job->class), + host1x_opcode_setclass(job->class, 0, 0), + host1x_opcode_setpayload(0), + host1x_opcode_setstreamid(job->engine_streamid_offset / 4)); - fence = host1x_syncpt_incr_max(job->syncpt, 1); + /* Before switching stream ID to real stream ID, ensure engine is idle. */ + fence = host1x_syncpt_incr_max(sp, 1); + host1x_cdma_push(&job->channel->cdma, + host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1), + HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) | + HOST1X_UCLASS_INCR_SYNCPT_COND_F(4)); + submit_wait(job, job->syncpt->id, fence, job->class); - /* First, increment a syncpoint on OP_DONE condition.. */ + /* Submit work. */ + job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs); + submit_gathers(job, job->syncpt_end - job->syncpt_incrs); + /* Before releasing MLOCK, ensure engine is idle again. */ + fence = host1x_syncpt_incr_max(sp, 1); host1x_cdma_push(&job->channel->cdma, host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1), HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) | - HOST1X_UCLASS_INCR_SYNCPT_COND_F(1)); + HOST1X_UCLASS_INCR_SYNCPT_COND_F(4)); + submit_wait(job, job->syncpt->id, fence, job->class); - /* Wait for syncpoint to increment */ + /* Release MLOCK. */ + host1x_cdma_push(cdma, + HOST1X_OPCODE_NOP, host1x_opcode_release_mlock(job->class)); +#else + if (job->serialize) { + /* + * Force serialization by inserting a host wait for the + * previous job to finish before this one can commence. + */ + host1x_cdma_push(cdma, + host1x_opcode_setclass(HOST1X_CLASS_HOST1X, + host1x_uclass_wait_syncpt_r(), 1), + host1x_class_host_wait_syncpt(job->syncpt->id, + host1x_syncpt_read_max(sp))); + } - host1x_cdma_push(&job->channel->cdma, - host1x_opcode_setclass(HOST1X_CLASS_HOST1X, - host1x_uclass_wait_syncpt_r(), 1), - host1x_class_host_wait_syncpt(job->syncpt->id, fence)); + /* Synchronize base register to allow using it for relative waiting */ + if (sp->base) + synchronize_syncpt_base(job); - /* - * Now that we know the engine is idle, return to class and - * change stream ID. - */ + /* add a setclass for modules that require it */ + if (job->class) + host1x_cdma_push(cdma, + host1x_opcode_setclass(job->class, 0, 0), + HOST1X_OPCODE_NOP); - host1x_cdma_push(&job->channel->cdma, - host1x_opcode_setclass(job->class, 0, 0), - HOST1X_OPCODE_NOP); + job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs); - host1x_cdma_push(&job->channel->cdma, - host1x_opcode_setpayload(job->memory_context->stream_id), - host1x_opcode_setstreamid(job->engine_streamid_offset / 4)); + submit_gathers(job, job->syncpt_end - job->syncpt_incrs); #endif } @@ -223,7 +282,6 @@ static int channel_submit(struct host1x_job *job) { struct host1x_channel *ch = job->channel; struct host1x_syncpt *sp = job->syncpt; - u32 user_syncpt_incrs = job->syncpt_incrs; u32 prev_max = 0; u32 syncval; int err; @@ -251,6 +309,7 @@ static int channel_submit(struct host1x_job *job) host1x_channel_set_streamid(ch); host1x_enable_gather_filter(ch); + host1x_hw_syncpt_assign_to_channel(host, sp, ch); /* begin a CDMA submit */ err = host1x_cdma_begin(&ch->cdma, job); @@ -259,40 +318,7 @@ static int channel_submit(struct host1x_job *job) goto error; } - if (job->serialize) { - /* - * Force serialization by inserting a host wait for the - * previous job to finish before this one can commence. - */ - host1x_cdma_push(&ch->cdma, - host1x_opcode_setclass(HOST1X_CLASS_HOST1X, - host1x_uclass_wait_syncpt_r(), 1), - host1x_class_host_wait_syncpt(job->syncpt->id, - host1x_syncpt_read_max(sp))); - } - - /* Synchronize base register to allow using it for relative waiting */ - if (sp->base) - synchronize_syncpt_base(job); - - host1x_hw_syncpt_assign_to_channel(host, sp, ch); - - /* add a setclass for modules that require it */ - if (job->class) - host1x_cdma_push(&ch->cdma, - host1x_opcode_setclass(job->class, 0, 0), - HOST1X_OPCODE_NOP); - - /* - * Ensure engine DMA is idle and set new stream ID. May increment - * syncpt max. - */ - host1x_channel_program_engine_streamid(job); - - syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs); - job->syncpt_end = syncval; - - submit_gathers(job, syncval - user_syncpt_incrs); + channel_program_cdma(job); /* end CDMA submit & stash pinned hMems into sync queue */ host1x_cdma_end(&ch->cdma, job); -- 2.7.4