lockdep_assert_held(&b->rb_lock);
GEM_BUG_ON(b->irq_wait == wait);
- /* This request is completed, so remove it from the tree, mark it as
+ /*
+ * This request is completed, so remove it from the tree, mark it as
* complete, and *then* wake up the associated task. N.B. when the
* task wakes up, it will find the empty rb_node, discern that it
* has already been removed from the tree and skip the serialisation
rb_erase(&wait->node, &b->waiters);
RB_CLEAR_NODE(&wait->node);
- wake_up_process(wait->tsk); /* implicit smp_wmb() */
+ if (wait->tsk->state != TASK_RUNNING)
+ wake_up_process(wait->tsk); /* implicit smp_wmb() */
}
static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine,
spin_unlock_irq(&b->rb_lock);
}
-static bool signal_complete(const struct i915_request *request)
-{
- if (!request)
- return false;
-
- /*
- * Carefully check if the request is complete, giving time for the
- * seqno to be visible or if the GPU hung.
- */
- return __i915_request_irq_complete(request);
-}
-
-static struct i915_request *to_signaler(struct rb_node *rb)
-{
- return rb_entry(rb, struct i915_request, signaling.node);
-}
-
static void signaler_set_rtpriority(void)
{
struct sched_param param = { .sched_priority = 1 };
sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m);
}
-static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
- struct i915_request *request)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
- lockdep_assert_held(&b->rb_lock);
-
- /*
- * Wake up all other completed waiters and select the
- * next bottom-half for the next user interrupt.
- */
- __intel_engine_remove_wait(engine, &request->signaling.wait);
-
- /*
- * Find the next oldest signal. Note that as we have
- * not been holding the lock, another client may
- * have installed an even older signal than the one
- * we just completed - so double check we are still
- * the oldest before picking the next one.
- */
- if (request->signaling.wait.seqno) {
- if (request == rcu_access_pointer(b->first_signal)) {
- struct rb_node *rb = rb_next(&request->signaling.node);
- rcu_assign_pointer(b->first_signal,
- rb ? to_signaler(rb) : NULL);
- }
-
- rb_erase(&request->signaling.node, &b->signals);
- request->signaling.wait.seqno = 0;
- }
-}
-
-static struct i915_request *
-get_first_signal_rcu(struct intel_breadcrumbs *b)
-{
- /*
- * See the big warnings for i915_gem_active_get_rcu() and similarly
- * for dma_fence_get_rcu_safe() that explain the intricacies involved
- * here with defeating CPU/compiler speculation and enforcing
- * the required memory barriers.
- */
- do {
- struct i915_request *request;
-
- request = rcu_dereference(b->first_signal);
- if (request)
- request = i915_request_get_rcu(request);
-
- barrier();
-
- if (!request || request == rcu_access_pointer(b->first_signal))
- return rcu_pointer_handoff(request);
-
- i915_request_put(request);
- } while (1);
-}
-
static int intel_breadcrumbs_signaler(void *arg)
{
struct intel_engine_cs *engine = arg;
struct intel_breadcrumbs *b = &engine->breadcrumbs;
- struct i915_request *request;
+ struct i915_request *rq, *n;
/* Install ourselves with high priority to reduce signalling latency */
signaler_set_rtpriority();
do {
bool do_schedule = true;
+ LIST_HEAD(list);
+ u32 seqno;
set_current_state(TASK_INTERRUPTIBLE);
+ if (list_empty(&b->signals))
+ goto sleep;
- /* We are either woken up by the interrupt bottom-half,
+ /*
+ * We are either woken up by the interrupt bottom-half,
* or by a client adding a new signaller. In both cases,
* the GPU seqno may have advanced beyond our oldest signal.
* If it has, propagate the signal, remove the waiter and
* need to wait for a new interrupt from the GPU or for
* a new client.
*/
- rcu_read_lock();
- request = get_first_signal_rcu(b);
- rcu_read_unlock();
- if (signal_complete(request)) {
- if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
- &request->fence.flags)) {
- local_bh_disable();
- dma_fence_signal(&request->fence);
- GEM_BUG_ON(!i915_request_completed(request));
- local_bh_enable(); /* kick start the tasklets */
- }
+ seqno = intel_engine_get_seqno(engine);
+
+ spin_lock_irq(&b->rb_lock);
+ list_for_each_entry_safe(rq, n, &b->signals, signaling.link) {
+ u32 this = rq->signaling.wait.seqno;
+
+ GEM_BUG_ON(!rq->signaling.wait.seqno);
- if (READ_ONCE(request->signaling.wait.seqno)) {
- spin_lock_irq(&b->rb_lock);
- __intel_engine_remove_signal(engine, request);
- spin_unlock_irq(&b->rb_lock);
+ if (!i915_seqno_passed(seqno, this))
+ break;
+
+ if (likely(this == i915_request_global_seqno(rq))) {
+ __intel_engine_remove_wait(engine,
+ &rq->signaling.wait);
+
+ rq->signaling.wait.seqno = 0;
+ __list_del_entry(&rq->signaling.link);
+
+ if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &rq->fence.flags)) {
+ list_add_tail(&rq->signaling.link,
+ &list);
+ i915_request_get(rq);
+ }
+ }
+ }
+ spin_unlock_irq(&b->rb_lock);
+
+ if (!list_empty(&list)) {
+ local_bh_disable();
+ list_for_each_entry_safe(rq, n, &list, signaling.link) {
+ dma_fence_signal(&rq->fence);
+ GEM_BUG_ON(!i915_request_completed(rq));
+ i915_request_put(rq);
}
+ local_bh_enable(); /* kick start the tasklets */
- /* If the engine is saturated we may be continually
+ /*
+ * If the engine is saturated we may be continually
* processing completed requests. This angers the
* NMI watchdog if we never let anything else
* have access to the CPU. Let's pretend to be nice
*/
do_schedule = need_resched();
}
- i915_request_put(request);
if (unlikely(do_schedule)) {
+ /* Before we sleep, check for a missed seqno */
+ if (current->state & TASK_NORMAL &&
+ !list_empty(&b->signals) &&
+ engine->irq_seqno_barrier &&
+ test_and_clear_bit(ENGINE_IRQ_BREADCRUMB,
+ &engine->irq_posted)) {
+ engine->irq_seqno_barrier(engine);
+ intel_engine_wakeup(engine);
+ }
+
+sleep:
if (kthread_should_park())
kthread_parkme();
return 0;
}
+static void insert_signal(struct intel_breadcrumbs *b,
+ struct i915_request *request,
+ const u32 seqno)
+{
+ struct i915_request *iter;
+
+ lockdep_assert_held(&b->rb_lock);
+
+ /*
+ * A reasonable assumption is that we are called to add signals
+ * in sequence, as the requests are submitted for execution and
+ * assigned a global_seqno. This will be the case for the majority
+ * of internally generated signals (inter-engine signaling).
+ *
+ * Out of order waiters triggering random signaling enabling will
+ * be more problematic, but hopefully rare enough and the list
+ * small enough that the O(N) insertion sort is not an issue.
+ */
+
+ list_for_each_entry_reverse(iter, &b->signals, signaling.link)
+ if (i915_seqno_passed(seqno, iter->signaling.wait.seqno))
+ break;
+
+ list_add(&request->signaling.link, &iter->signaling.link);
+}
+
void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
{
struct intel_engine_cs *engine = request->engine;
struct intel_breadcrumbs *b = &engine->breadcrumbs;
u32 seqno;
- /* Note that we may be called from an interrupt handler on another
+ /*
+ * Note that we may be called from an interrupt handler on another
* device (e.g. nouveau signaling a fence completion causing us
* to submit a request, and so enable signaling). As such,
* we need to make sure that all other users of b->rb_lock protect
lockdep_assert_held(&request->lock);
seqno = i915_request_global_seqno(request);
- if (!seqno)
+ if (!seqno) /* will be enabled later upon execution */
return;
- spin_lock(&b->rb_lock);
-
GEM_BUG_ON(request->signaling.wait.seqno);
request->signaling.wait.tsk = b->signaler;
request->signaling.wait.request = request;
request->signaling.wait.seqno = seqno;
- /* First add ourselves into the list of waiters, but register our
+ /*
+ * Add ourselves into the list of waiters, but registering our
* bottom-half as the signaller thread. As per usual, only the oldest
* waiter (not just signaller) is tasked as the bottom-half waking
* up all completed waiters after the user interrupt.
* If we are the oldest waiter, enable the irq (after which we
* must double check that the seqno did not complete).
*/
+ spin_lock(&b->rb_lock);
+ insert_signal(b, request, seqno);
wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
-
- if (!__i915_request_completed(request, seqno)) {
- struct rb_node *parent, **p;
- bool first;
-
- /* Now insert ourselves into the retirement ordered list of
- * signals on this engine. We track the oldest seqno as that
- * will be the first signal to complete.
- */
- parent = NULL;
- first = true;
- p = &b->signals.rb_node;
- while (*p) {
- parent = *p;
- if (i915_seqno_passed(seqno,
- to_signaler(parent)->signaling.wait.seqno)) {
- p = &parent->rb_right;
- first = false;
- } else {
- p = &parent->rb_left;
- }
- }
- rb_link_node(&request->signaling.node, parent, p);
- rb_insert_color(&request->signaling.node, &b->signals);
- if (first)
- rcu_assign_pointer(b->first_signal, request);
- } else {
- __intel_engine_remove_wait(engine, &request->signaling.wait);
- request->signaling.wait.seqno = 0;
- wakeup = false;
- }
-
spin_unlock(&b->rb_lock);
if (wakeup)
void intel_engine_cancel_signaling(struct i915_request *request)
{
+ struct intel_engine_cs *engine = request->engine;
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&request->lock);
- if (READ_ONCE(request->signaling.wait.seqno)) {
- struct intel_engine_cs *engine = request->engine;
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ if (!READ_ONCE(request->signaling.wait.seqno))
+ return;
- spin_lock(&b->rb_lock);
- __intel_engine_remove_signal(engine, request);
- spin_unlock(&b->rb_lock);
- }
+ spin_lock(&b->rb_lock);
+ __intel_engine_remove_wait(engine, &request->signaling.wait);
+ if (fetch_and_zero(&request->signaling.wait.seqno))
+ __list_del_entry(&request->signaling.link);
+ spin_unlock(&b->rb_lock);
}
int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
+ INIT_LIST_HEAD(&b->signals);
+
/* Spawn a thread to provide a common bottom-half for all signals.
* As this is an asynchronous interface we cannot steal the current
* task for handling the bottom-half to the user interrupt, therefore
/* The engines should be idle and all requests accounted for! */
WARN_ON(READ_ONCE(b->irq_wait));
WARN_ON(!RB_EMPTY_ROOT(&b->waiters));
- WARN_ON(rcu_access_pointer(b->first_signal));
- WARN_ON(!RB_EMPTY_ROOT(&b->signals));
+ WARN_ON(!list_empty(&b->signals));
if (!IS_ERR_OR_NULL(b->signaler))
kthread_stop(b->signaler);
struct intel_breadcrumbs *b = &engine->breadcrumbs;
bool busy = false;
- spin_lock_irq(&b->rb_lock);
-
if (b->irq_wait) {
- wake_up_process(b->irq_wait->tsk);
- busy = true;
+ spin_lock_irq(&b->irq_lock);
+
+ if (b->irq_wait) {
+ wake_up_process(b->irq_wait->tsk);
+ busy = true;
+ }
+
+ spin_unlock_irq(&b->irq_lock);
}
- if (rcu_access_pointer(b->first_signal)) {
+ if (!busy && !list_empty(&b->signals)) {
wake_up_process(b->signaler);
busy = true;
}
- spin_unlock_irq(&b->rb_lock);
-
return busy;
}