}
}
-static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
+static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{
struct intel_engine_cs *engine =
container_of(b, struct intel_engine_cs, breadcrumbs);
lockdep_assert_held(&b->irq_lock);
if (b->irq_armed)
- return true;
+ return;
if (!intel_gt_pm_get_if_awake(engine->gt))
- return false;
+ return;
/*
* The breadcrumb irq will be disarmed on the interrupt after the
if (!b->irq_enabled++)
irq_enable(engine);
-
- return true;
}
void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
{
}
-bool i915_request_enable_breadcrumb(struct i915_request *rq)
+static void insert_breadcrumb(struct i915_request *rq,
+ struct intel_breadcrumbs *b)
{
- lockdep_assert_held(&rq->lock);
+ struct intel_context *ce = rq->context;
+ struct list_head *pos;
- if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
- return true;
+ if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
+ return;
- if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
- struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
- struct intel_context *ce = rq->context;
- struct list_head *pos;
+ __intel_breadcrumbs_arm_irq(b);
- spin_lock(&b->irq_lock);
+ /*
+ * We keep the seqno in retirement order, so we can break
+ * inside intel_engine_signal_breadcrumbs as soon as we've
+ * passed the last completed request (or seen a request that
+ * hasn't event started). We could walk the timeline->requests,
+ * but keeping a separate signalers_list has the advantage of
+ * hopefully being much smaller than the full list and so
+ * provides faster iteration and detection when there are no
+ * more interrupts required for this context.
+ *
+ * We typically expect to add new signalers in order, so we
+ * start looking for our insertion point from the tail of
+ * the list.
+ */
+ list_for_each_prev(pos, &ce->signals) {
+ struct i915_request *it =
+ list_entry(pos, typeof(*it), signal_link);
- if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
- goto unlock;
+ if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
+ break;
+ }
+ list_add(&rq->signal_link, pos);
+ if (pos == &ce->signals) /* catch transitions from empty list */
+ list_move_tail(&ce->signal_link, &b->signalers);
+ GEM_BUG_ON(!check_signal_order(ce, rq));
- if (!__intel_breadcrumbs_arm_irq(b))
- goto unlock;
+ set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+}
- /*
- * We keep the seqno in retirement order, so we can break
- * inside intel_engine_signal_breadcrumbs as soon as we've
- * passed the last completed request (or seen a request that
- * hasn't event started). We could walk the timeline->requests,
- * but keeping a separate signalers_list has the advantage of
- * hopefully being much smaller than the full list and so
- * provides faster iteration and detection when there are no
- * more interrupts required for this context.
- *
- * We typically expect to add new signalers in order, so we
- * start looking for our insertion point from the tail of
- * the list.
- */
- list_for_each_prev(pos, &ce->signals) {
- struct i915_request *it =
- list_entry(pos, typeof(*it), signal_link);
+bool i915_request_enable_breadcrumb(struct i915_request *rq)
+{
+ struct intel_breadcrumbs *b;
- if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
- break;
- }
- list_add(&rq->signal_link, pos);
- if (pos == &ce->signals) /* catch transitions from empty list */
- list_move_tail(&ce->signal_link, &b->signalers);
- GEM_BUG_ON(!check_signal_order(ce, rq));
+ /* Serialises with i915_request_retire() using rq->lock */
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
+ return true;
- set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
-unlock:
+ /*
+ * Peek at i915_request_submit()/i915_request_unsubmit() status.
+ *
+ * If the request is not yet active (and not signaled), we will
+ * attach the breadcrumb later.
+ */
+ if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
+ return true;
+
+ /*
+ * rq->engine is locked by rq->engine->active.lock. That however
+ * is not known until after rq->engine has been dereferenced and
+ * the lock acquired. Hence we acquire the lock and then validate
+ * that rq->engine still matches the lock we hold for it.
+ *
+ * Here, we are using the breadcrumb lock as a proxy for the
+ * rq->engine->active.lock, and we know that since the breadcrumb
+ * will be serialised within i915_request_submit/i915_request_unsubmit,
+ * the engine cannot change while active as long as we hold the
+ * breadcrumb lock on that engine.
+ *
+ * From the dma_fence_enable_signaling() path, we are outside of the
+ * request submit/unsubmit path, and so we must be more careful to
+ * acquire the right lock.
+ */
+ b = &READ_ONCE(rq->engine)->breadcrumbs;
+ spin_lock(&b->irq_lock);
+ while (unlikely(b != &READ_ONCE(rq->engine)->breadcrumbs)) {
spin_unlock(&b->irq_lock);
+ b = &READ_ONCE(rq->engine)->breadcrumbs;
+ spin_lock(&b->irq_lock);
}
+ /*
+ * Now that we are finally serialised with request submit/unsubmit,
+ * [with b->irq_lock] and with i915_request_retire() [via checking
+ * SIGNALED with rq->lock] confirm the request is indeed active. If
+ * it is no longer active, the breadcrumb will be attached upon
+ * i915_request_submit().
+ */
+ if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
+ insert_breadcrumb(rq, b);
+
+ spin_unlock(&b->irq_lock);
+
return !__request_completed(rq);
}
{
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
- lockdep_assert_held(&rq->lock);
-
/*
* We must wait for b->irq_lock so that we know the interrupt handler
* has released its reference to the intel_context and has completed
dma_fence_signal_locked(&rq->fence);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
i915_request_cancel_breadcrumb(rq);
+ spin_unlock_irq(&rq->lock);
+
if (i915_request_has_waitboost(rq)) {
GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
atomic_dec(&rq->engine->gt->rps.num_waiters);
}
- if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
- set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
- __notify_execute_cb(rq);
- }
+
+ /*
+ * We only loosely track inflight requests across preemption,
+ * and so we may find ourselves attempting to retire a _completed_
+ * request that we have removed from the HW and put back on a run
+ * queue.
+ *
+ * As we set I915_FENCE_FLAG_ACTIVE on the request, this should be
+ * after removing the breadcrumb and signaling it, so that we do not
+ * inadvertently attach the breadcrumb to a completed request.
+ */
+ remove_from_engine(rq);
GEM_BUG_ON(!llist_empty(&rq->execute_cb));
spin_unlock_irq(&rq->lock);
clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
}
- /* We may be recursing from the signal callback of another i915 fence */
- if (!i915_request_signaled(request)) {
- spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
-
- __notify_execute_cb(request);
- if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
- &request->fence.flags) &&
- !i915_request_enable_breadcrumb(request))
- intel_engine_signal_breadcrumbs(engine);
+ /*
+ * XXX Rollback bonded-execution on __i915_request_unsubmit()?
+ *
+ * In the future, perhaps when we have an active time-slicing scheduler,
+ * it will be interesting to unsubmit parallel execution and remove
+ * busywaits from the GPU until their master is restarted. This is
+ * quite hairy, we have to carefully rollback the fence and do a
+ * preempt-to-idle cycle on the target engine, all the while the
+ * master execute_cb may refire.
+ */
+ __notify_execute_cb(request);
- spin_unlock(&request->lock);
- GEM_BUG_ON(!llist_empty(&request->execute_cb));
- }
+ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
+ !i915_request_enable_breadcrumb(request))
+ intel_engine_signal_breadcrumbs(engine);
return result;
}
{
struct intel_engine_cs *engine = request->engine;
+ /*
+ * Only unwind in reverse order, required so that the per-context list
+ * is kept in seqno/ring order.
+ */
RQ_TRACE(request, "\n");
GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->active.lock);
/*
- * Only unwind in reverse order, required so that the per-context list
- * is kept in seqno/ring order.
+ * Before we remove this breadcrumb from the signal list, we have
+ * to ensure that a concurrent dma_fence_enable_signaling() does not
+ * attach itself. We first mark the request as no longer active and
+ * make sure that is visible to other cores, and then remove the
+ * breadcrumb if attached.
*/
-
- /* We may be recursing from the signal callback of another i915 fence */
- spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
-
+ GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
+ clear_bit_unlock(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
i915_request_cancel_breadcrumb(request);
- GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
- clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
-
- spin_unlock(&request->lock);
-
/* We've already spun, don't charge on resubmitting. */
if (request->sched.semaphores && i915_request_started(request))
request->sched.semaphores = 0;