drm/i915/gt: ce->inflight updates are now serialised
authorChris Wilson <chris@chris-wilson.co.uk>
Thu, 24 Dec 2020 13:55:44 +0000 (13:55 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Thu, 24 Dec 2020 15:02:41 +0000 (15:02 +0000)
Since schedule-in and schedule-out are now both always under the tasklet
bitlock, we can reduce the individual atomic operations to simple
instructions and worry less.

This notably eliminates the race observed with intel_context_inflight in
__engine_unpark().

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2583
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201224135544.1713-9-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/gt/intel_execlists_submission.c

index dc1312d862edc7144ac5313d0f55af225c04e8e0..1fae6c6f38682cfb0b83533800b676f6c9cf229c 100644 (file)
@@ -524,11 +524,11 @@ __execlists_schedule_in(struct i915_request *rq)
                ce->lrc.ccid = ce->tag;
        } else {
                /* We don't need a strict matching tag, just different values */
-               unsigned int tag = ffs(READ_ONCE(engine->context_tag));
+               unsigned int tag = __ffs(engine->context_tag);
 
-               GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
-               clear_bit(tag - 1, &engine->context_tag);
-               ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32);
+               GEM_BUG_ON(tag >= BITS_PER_LONG);
+               __clear_bit(tag, &engine->context_tag);
+               ce->lrc.ccid = (1 + tag) << (GEN11_SW_CTX_ID_SHIFT - 32);
 
                BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
        }
@@ -541,6 +541,8 @@ __execlists_schedule_in(struct i915_request *rq)
        execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
        intel_engine_context_in(engine);
 
+       CE_TRACE(ce, "schedule-in, ccid:%x\n", ce->lrc.ccid);
+
        return engine;
 }
 
@@ -552,13 +554,10 @@ static inline void execlists_schedule_in(struct i915_request *rq, int idx)
        GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
        trace_i915_request_in(rq, idx);
 
-       old = READ_ONCE(ce->inflight);
-       do {
-               if (!old) {
-                       WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
-                       break;
-               }
-       } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
+       old = ce->inflight;
+       if (!old)
+               old = __execlists_schedule_in(rq);
+       WRITE_ONCE(ce->inflight, ptr_inc(old));
 
        GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
 }
@@ -611,12 +610,11 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
                tasklet_hi_schedule(&ve->base.execlists.tasklet);
 }
 
-static inline void
-__execlists_schedule_out(struct i915_request *rq,
-                        struct intel_engine_cs * const engine,
-                        unsigned int ccid)
+static inline void __execlists_schedule_out(struct i915_request *rq)
 {
        struct intel_context * const ce = rq->context;
+       struct intel_engine_cs * const engine = rq->engine;
+       unsigned int ccid;
 
        /*
         * NB process_csb() is not under the engine->active.lock and hence
@@ -624,6 +622,8 @@ __execlists_schedule_out(struct i915_request *rq,
         * refrain from doing non-trivial work here.
         */
 
+       CE_TRACE(ce, "schedule-out, ccid:%x\n", ce->lrc.ccid);
+
        if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
                lrc_check_regs(ce, engine, "after");
 
@@ -635,12 +635,13 @@ __execlists_schedule_out(struct i915_request *rq,
            __i915_request_is_complete(rq))
                intel_engine_add_retire(engine, ce->timeline);
 
+       ccid = ce->lrc.ccid;
        ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
        ccid &= GEN12_MAX_CONTEXT_HW_ID;
        if (ccid < BITS_PER_LONG) {
                GEM_BUG_ON(ccid == 0);
                GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
-               set_bit(ccid - 1, &engine->context_tag);
+               __set_bit(ccid - 1, &engine->context_tag);
        }
 
        lrc_update_runtime(ce);
@@ -661,26 +662,23 @@ __execlists_schedule_out(struct i915_request *rq,
         */
        if (ce->engine != engine)
                kick_siblings(rq, ce);
-
-       intel_context_put(ce);
 }
 
 static inline void
 execlists_schedule_out(struct i915_request *rq)
 {
        struct intel_context * const ce = rq->context;
-       struct intel_engine_cs *cur, *old;
-       u32 ccid;
 
        trace_i915_request_out(rq);
 
-       ccid = rq->context->lrc.ccid;
-       old = READ_ONCE(ce->inflight);
-       do
-               cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
-       while (!try_cmpxchg(&ce->inflight, &old, cur));
-       if (!cur)
-               __execlists_schedule_out(rq, old, ccid);
+       GEM_BUG_ON(!ce->inflight);
+       ce->inflight = ptr_dec(ce->inflight);
+       if (!__intel_context_inflight_count(ce->inflight)) {
+               GEM_BUG_ON(ce->inflight != rq->engine);
+               __execlists_schedule_out(rq);
+               WRITE_ONCE(ce->inflight, NULL);
+               intel_context_put(ce);
+       }
 
        i915_request_put(rq);
 }