irq_execute_cb(wrk);
}
-static void __notify_execute_cb(struct i915_request *rq)
+static __always_inline void
+__notify_execute_cb(struct i915_request *rq, bool (*fn)(struct irq_work *wrk))
{
struct execute_cb *cb, *cn;
- lockdep_assert_held(&rq->lock);
-
- GEM_BUG_ON(!i915_request_is_active(rq));
if (llist_empty(&rq->execute_cb))
return;
- llist_for_each_entry_safe(cb, cn, rq->execute_cb.first, work.llnode)
- irq_work_queue(&cb->work);
+ llist_for_each_entry_safe(cb, cn,
+ llist_del_all(&rq->execute_cb),
+ work.llnode)
+ fn(&cb->work);
+}
- /*
- * XXX Rollback on __i915_request_unsubmit()
- *
- * In the future, perhaps when we have an active time-slicing scheduler,
- * it will be interesting to unsubmit parallel execution and remove
- * busywaits from the GPU until their master is restarted. This is
- * quite hairy, we have to carefully rollback the fence and do a
- * preempt-to-idle cycle on the target engine, all the while the
- * master execute_cb may refire.
- */
- init_llist_head(&rq->execute_cb);
+static void __notify_execute_cb_irq(struct i915_request *rq)
+{
+ __notify_execute_cb(rq, irq_work_queue);
+}
+
+static bool irq_work_imm(struct irq_work *wrk)
+{
+ wrk->func(wrk);
+ return false;
+}
+
+static void __notify_execute_cb_imm(struct i915_request *rq)
+{
+ __notify_execute_cb(rq, irq_work_imm);
}
static void free_capture_list(struct i915_request *request)
locked = engine;
}
list_del_init(&rq->sched.link);
+
clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
+
+ /* Prevent further __await_execution() registering a cb, then flush */
+ set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
+
spin_unlock_irq(&locked->active.lock);
+
+ __notify_execute_cb_imm(rq);
}
bool i915_request_retire(struct i915_request *rq)
GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
trace_i915_request_retire(rq);
+ i915_request_mark_complete(rq);
/*
* We know the GPU must have read the request to have
__i915_request_fill(rq, POISON_FREE);
rq->ring->head = rq->postfix;
- /*
- * We only loosely track inflight requests across preemption,
- * and so we may find ourselves attempting to retire a _completed_
- * request that we have removed from the HW and put back on a run
- * queue.
- */
- remove_from_engine(rq);
-
- i915_request_mark_complete(rq);
if (!i915_request_signaled(rq)) {
spin_lock_irq(&rq->lock);
dma_fence_signal_locked(&rq->fence);
*/
remove_from_engine(rq);
GEM_BUG_ON(!llist_empty(&rq->execute_cb));
- spin_unlock_irq(&rq->lock);
__list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */
} while (i915_request_retire(tmp) && tmp != rq);
}
-static void __llist_add(struct llist_node *node, struct llist_head *head)
-{
- node->next = head->first;
- head->first = node;
-}
-
static struct i915_request * const *
__engine_active(struct intel_engine_cs *engine)
{
cb->work.func = irq_execute_cb_hook;
}
- spin_lock_irq(&signal->lock);
- if (i915_request_is_active(signal) || __request_in_flight(signal)) {
- if (hook) {
- hook(rq, &signal->fence);
- i915_request_put(signal);
- }
- i915_sw_fence_complete(cb->fence);
- kmem_cache_free(global.slab_execute_cbs, cb);
- } else {
- __llist_add(&cb->work.llnode, &signal->execute_cb);
+ /*
+ * Register the callback first, then see if the signaler is already
+ * active. This ensures that if we race with the
+ * __notify_execute_cb from i915_request_submit() and we are not
+ * included in that list, we get a second bite of the cherry and
+ * execute it ourselves. After this point, a future
+ * i915_request_submit() will notify us.
+ *
+ * In i915_request_retire() we set the ACTIVE bit on a completed
+ * request (then flush the execute_cb). So by registering the
+ * callback first, then checking the ACTIVE bit, we serialise with
+ * the completed/retired request.
+ */
+ if (llist_add(&cb->work.llnode, &signal->execute_cb)) {
+ if (i915_request_is_active(signal) ||
+ __request_in_flight(signal))
+ __notify_execute_cb_imm(signal);
}
- spin_unlock_irq(&signal->lock);
return 0;
}
* preempt-to-idle cycle on the target engine, all the while the
* master execute_cb may refire.
*/
- __notify_execute_cb(request);
+ __notify_execute_cb_irq(request);
- if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
- i915_request_enable_breadcrumb(request);
+ /* We may be recursing from the signal callback of another i915 fence */
+ if (!i915_request_signaled(request)) {
+ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+
+ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+ &request->fence.flags) &&
+ !i915_request_enable_breadcrumb(request))
+ intel_engine_signal_breadcrumbs(engine);
+
+ spin_unlock(&request->lock);
+ }
return result;
}