struct intel_engine_cs *engine,
struct intel_ring *ring);
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+ return rb_entry(rb, struct i915_priolist, node);
+}
+
+static inline int rq_prio(const struct i915_request *rq)
+{
+ return rq->priotree.priority;
+}
+
+static inline bool need_preempt(const struct intel_engine_cs *engine,
+ const struct i915_request *last,
+ int prio)
+{
+ return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+}
+
/**
* intel_lr_context_descriptor_update() - calculate & cache the descriptor
* descriptor for a pinned context
parent = &execlists->queue.rb_node;
while (*parent) {
rb = *parent;
- p = rb_entry(rb, typeof(*p), node);
+ p = to_priolist(rb);
if (prio > p->priority) {
parent = &rb->rb_left;
} else if (prio < p->priority) {
if (first)
execlists->first = &p->node;
- return ptr_pack_bits(p, first, 1);
+ return p;
}
static void unwind_wa_tail(struct i915_request *rq)
__i915_request_unsubmit(rq);
unwind_wa_tail(rq);
- GEM_BUG_ON(rq->priotree.priority == I915_PRIORITY_INVALID);
- if (rq->priotree.priority != last_prio) {
- p = lookup_priolist(engine,
- &rq->priotree,
- rq->priotree.priority);
- p = ptr_mask_bits(p, 1);
-
- last_prio = rq->priotree.priority;
+ GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+ if (rq_prio(rq) != last_prio) {
+ last_prio = rq_prio(rq);
+ p = lookup_priolist(engine, &rq->priotree, last_prio);
}
list_add(&rq->priotree.link, &p->requests);
desc = execlists_update_context(rq);
GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
- GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x\n",
+ GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n",
engine->name, n,
port[n].context_id, count,
- rq->global_seqno);
+ rq->global_seqno,
+ rq_prio(rq));
} else {
GEM_BUG_ON(!n);
desc = 0;
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+ /*
+ * Switch to our empty preempt context so
+ * the state of the GPU is known (idle).
+ */
GEM_TRACE("%s\n", engine->name);
for (n = execlists_num_ports(&engine->execlists); --n; )
elsp_write(0, engine->execlists.elsp);
elsp_write(ce->lrc_desc, engine->execlists.elsp);
execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
+ execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
}
static void execlists_dequeue(struct intel_engine_cs *engine)
spin_lock_irq(&engine->timeline->lock);
rb = execlists->first;
GEM_BUG_ON(rb_first(&execlists->queue) != rb);
- if (!rb)
- goto unlock;
if (last) {
/*
if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
goto unlock;
- if (engine->i915->preempt_context &&
- rb_entry(rb, struct i915_priolist, node)->priority >
- max(last->priotree.priority, 0)) {
- /*
- * Switch to our empty preempt context so
- * the state of the GPU is known (idle).
- */
+ if (need_preempt(engine, last, execlists->queue_priority)) {
inject_preempt_context(engine);
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_PREEMPT);
goto unlock;
- } else {
- /*
- * In theory, we could coalesce more requests onto
- * the second port (the first port is active, with
- * no preemptions pending). However, that means we
- * then have to deal with the possible lite-restore
- * of the second port (as we submit the ELSP, there
- * may be a context-switch) but also we may complete
- * the resubmission before the context-switch. Ergo,
- * coalescing onto the second port will cause a
- * preemption event, but we cannot predict whether
- * that will affect port[0] or port[1].
- *
- * If the second port is already active, we can wait
- * until the next context-switch before contemplating
- * new requests. The GPU will be busy and we should be
- * able to resubmit the new ELSP before it idles,
- * avoiding pipeline bubbles (momentary pauses where
- * the driver is unable to keep up the supply of new
- * work).
- */
- if (port_count(&port[1]))
- goto unlock;
-
- /* WaIdleLiteRestore:bdw,skl
- * Apply the wa NOOPs to prevent
- * ring:HEAD == rq:TAIL as we resubmit the
- * request. See gen8_emit_breadcrumb() for
- * where we prepare the padding after the
- * end of the request.
- */
- last->tail = last->wa_tail;
}
+
+ /*
+ * In theory, we could coalesce more requests onto
+ * the second port (the first port is active, with
+ * no preemptions pending). However, that means we
+ * then have to deal with the possible lite-restore
+ * of the second port (as we submit the ELSP, there
+ * may be a context-switch) but also we may complete
+ * the resubmission before the context-switch. Ergo,
+ * coalescing onto the second port will cause a
+ * preemption event, but we cannot predict whether
+ * that will affect port[0] or port[1].
+ *
+ * If the second port is already active, we can wait
+ * until the next context-switch before contemplating
+ * new requests. The GPU will be busy and we should be
+ * able to resubmit the new ELSP before it idles,
+ * avoiding pipeline bubbles (momentary pauses where
+ * the driver is unable to keep up the supply of new
+ * work). However, we have to double check that the
+ * priorities of the ports haven't been switch.
+ */
+ if (port_count(&port[1]))
+ goto unlock;
+
+ /*
+ * WaIdleLiteRestore:bdw,skl
+ * Apply the wa NOOPs to prevent
+ * ring:HEAD == rq:TAIL as we resubmit the
+ * request. See gen8_emit_breadcrumb() for
+ * where we prepare the padding after the
+ * end of the request.
+ */
+ last->tail = last->wa_tail;
}
- do {
- struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+ while (rb) {
+ struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
INIT_LIST_HEAD(&p->requests);
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
- } while (rb);
+ }
done:
+ execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
execlists->first = rb;
if (submit)
port_assign(port, last);
/* Flush the queued requests to the timeline list (for retiring). */
rb = execlists->first;
while (rb) {
- struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+ struct i915_priolist *p = to_priolist(rb);
list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
INIT_LIST_HEAD(&rq->priotree.link);
/* Remaining _unready_ requests will be nop'ed when submitted */
-
+ execlists->queue_priority = INT_MIN;
execlists->queue = RB_ROOT;
execlists->first = NULL;
GEM_BUG_ON(port_isset(execlists->port));
EXECLISTS_ACTIVE_USER));
rq = port_unpack(port, &count);
- GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n",
+ GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
engine->name,
port->context_id, count,
- rq ? rq->global_seqno : 0);
+ rq ? rq->global_seqno : 0,
+ rq ? rq_prio(rq) : 0);
/* Check the context/desc id for this event matches */
GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
}
-static void insert_request(struct intel_engine_cs *engine,
- struct i915_priotree *pt,
- int prio)
+static void queue_request(struct intel_engine_cs *engine,
+ struct i915_priotree *pt,
+ int prio)
{
- struct i915_priolist *p = lookup_priolist(engine, pt, prio);
+ list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
+}
- list_add_tail(&pt->link, &ptr_mask_bits(p, 1)->requests);
- if (ptr_unmask_bits(p, 1))
+static void submit_queue(struct intel_engine_cs *engine, int prio)
+{
+ if (prio > engine->execlists.queue_priority) {
+ engine->execlists.queue_priority = prio;
tasklet_hi_schedule(&engine->execlists.tasklet);
+ }
}
static void execlists_submit_request(struct i915_request *request)
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&engine->timeline->lock, flags);
- insert_request(engine, &request->priotree, request->priotree.priority);
+ queue_request(engine, &request->priotree, rq_prio(request));
+ submit_queue(engine, rq_prio(request));
GEM_BUG_ON(!engine->execlists.first);
GEM_BUG_ON(list_empty(&request->priotree.link));
* static void update_priorities(struct i915_priotree *pt, prio) {
* list_for_each_entry(dep, &pt->signalers_list, signal_link)
* update_priorities(dep->signal, prio)
- * insert_request(pt);
+ * queue_request(pt);
* }
* but that may have unlimited recursion depth and so runs a very
* real risk of overunning the kernel stack. Instead, we build
pt->priority = prio;
if (!list_empty(&pt->link)) {
__list_del_entry(&pt->link);
- insert_request(engine, pt, prio);
+ queue_request(engine, pt, prio);
}
+ submit_queue(engine, prio);
}
spin_unlock_irq(&engine->timeline->lock);