drm/i915/execlists: Pack the count into the low bits of the port.request
authorChris Wilson <chris@chris-wilson.co.uk>
Wed, 17 May 2017 12:10:00 +0000 (13:10 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Wed, 17 May 2017 12:38:06 +0000 (13:38 +0100)
add/remove: 1/1 grow/shrink: 5/4 up/down: 391/-578 (-187)
function                                     old     new   delta
execlists_submit_ports                       262     471    +209
port_assign.isra                               -     136    +136
capture                                     6344    6359     +15
reset_common_ring                            438     452     +14
execlists_submit_request                     228     238     +10
gen8_init_common_ring                        334     341      +7
intel_engine_is_idle                         106     105      -1
i915_engine_info                            2314    2290     -24
__i915_gem_set_wedged_BKL                    485     411     -74
intel_lrc_irq_handler                       1789    1604    -185
execlists_update_context                     294       -    -294

The most important change there is the improve to the
intel_lrc_irq_handler and excclist_submit_ports (net improvement since
execlists_update_context is now inlined).

v2: Use the port_api() for guc as well (even though currently we do not
pack any counters in there, yet) and hide all port->request_count inside
the helpers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-5-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gpu_error.c
drivers/gpu/drm/i915/i915_guc_submission.c
drivers/gpu/drm/i915/intel_engine_cs.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_ringbuffer.h

index 76abff1..e08ac70 100644 (file)
@@ -3353,6 +3353,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
                if (i915.enable_execlists) {
                        u32 ptr, read, write;
                        struct rb_node *rb;
+                       unsigned int idx;
 
                        seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
                                   I915_READ(RING_EXECLIST_STATUS_LO(engine)),
@@ -3370,8 +3371,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
                        if (read > write)
                                write += GEN8_CSB_ENTRIES;
                        while (read < write) {
-                               unsigned int idx = ++read % GEN8_CSB_ENTRIES;
-
+                               idx = ++read % GEN8_CSB_ENTRIES;
                                seq_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
                                           idx,
                                           I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)),
@@ -3379,21 +3379,19 @@ static int i915_engine_info(struct seq_file *m, void *unused)
                        }
 
                        rcu_read_lock();
-                       rq = READ_ONCE(engine->execlist_port[0].request);
-                       if (rq) {
-                               seq_printf(m, "\t\tELSP[0] count=%d, ",
-                                          engine->execlist_port[0].count);
-                               print_request(m, rq, "rq: ");
-                       } else {
-                               seq_printf(m, "\t\tELSP[0] idle\n");
-                       }
-                       rq = READ_ONCE(engine->execlist_port[1].request);
-                       if (rq) {
-                               seq_printf(m, "\t\tELSP[1] count=%d, ",
-                                          engine->execlist_port[1].count);
-                               print_request(m, rq, "rq: ");
-                       } else {
-                               seq_printf(m, "\t\tELSP[1] idle\n");
+                       for (idx = 0; idx < ARRAY_SIZE(engine->execlist_port); idx++) {
+                               unsigned int count;
+
+                               rq = port_unpack(&engine->execlist_port[idx],
+                                                &count);
+                               if (rq) {
+                                       seq_printf(m, "\t\tELSP[%d] count=%d, ",
+                                                  idx, count);
+                                       print_request(m, rq, "rq: ");
+                               } else {
+                                       seq_printf(m, "\t\tELSP[%d] idle\n",
+                                                  idx);
+                               }
                        }
                        rcu_read_unlock();
 
index b63c3d1..75d7575 100644 (file)
@@ -3019,12 +3019,14 @@ static void engine_set_wedged(struct intel_engine_cs *engine)
         */
 
        if (i915.enable_execlists) {
+               struct execlist_port *port = engine->execlist_port;
                unsigned long flags;
+               unsigned int n;
 
                spin_lock_irqsave(&engine->timeline->lock, flags);
 
-               i915_gem_request_put(engine->execlist_port[0].request);
-               i915_gem_request_put(engine->execlist_port[1].request);
+               for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
+                       i915_gem_request_put(port_request(&port[n]));
                memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
                engine->execlist_queue = RB_ROOT;
                engine->execlist_first = NULL;
index ec526d9..e18f350 100644 (file)
@@ -1324,12 +1324,17 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 static void error_record_engine_execlists(struct intel_engine_cs *engine,
                                          struct drm_i915_error_engine *ee)
 {
+       const struct execlist_port *port = engine->execlist_port;
        unsigned int n;
 
-       for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
-               if (engine->execlist_port[n].request)
-                       record_request(engine->execlist_port[n].request,
-                                      &ee->execlist[n]);
+       for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
+               struct drm_i915_gem_request *rq = port_request(&port[n]);
+
+               if (!rq)
+                       break;
+
+               record_request(rq, &ee->execlist[n]);
+       }
 }
 
 static void record_context(struct drm_i915_error_context *e,
index 7e85b5a..014cbd1 100644 (file)
@@ -653,10 +653,22 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq)
        spin_unlock(&rq->lock);
 }
 
+static void port_assign(struct execlist_port *port,
+                       struct drm_i915_gem_request *rq)
+{
+       GEM_BUG_ON(rq == port_request(port));
+
+       if (port_isset(port))
+               i915_gem_request_put(port_request(port));
+
+       port_set(port, i915_gem_request_get(rq));
+       nested_enable_signaling(rq);
+}
+
 static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 {
        struct execlist_port *port = engine->execlist_port;
-       struct drm_i915_gem_request *last = port[0].request;
+       struct drm_i915_gem_request *last = port_request(port);
        struct rb_node *rb;
        bool submit = false;
 
@@ -670,8 +682,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
                        if (port != engine->execlist_port)
                                break;
 
-                       i915_gem_request_assign(&port->request, last);
-                       nested_enable_signaling(last);
+                       port_assign(port, last);
                        port++;
                }
 
@@ -681,13 +692,12 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
                rq->priotree.priority = INT_MAX;
 
                i915_guc_submit(rq);
-               trace_i915_gem_request_in(rq, port - engine->execlist_port);
+               trace_i915_gem_request_in(rq, port_index(port, engine));
                last = rq;
                submit = true;
        }
        if (submit) {
-               i915_gem_request_assign(&port->request, last);
-               nested_enable_signaling(last);
+               port_assign(port, last);
                engine->execlist_first = rb;
        }
        spin_unlock_irq(&engine->timeline->lock);
@@ -703,17 +713,19 @@ static void i915_guc_irq_handler(unsigned long data)
        bool submit;
 
        do {
-               rq = port[0].request;
+               rq = port_request(&port[0]);
                while (rq && i915_gem_request_completed(rq)) {
                        trace_i915_gem_request_out(rq);
                        i915_gem_request_put(rq);
-                       port[0].request = port[1].request;
-                       port[1].request = NULL;
-                       rq = port[0].request;
+
+                       port[0] = port[1];
+                       memset(&port[1], 0, sizeof(port[1]));
+
+                       rq = port_request(&port[0]);
                }
 
                submit = false;
-               if (!port[1].request)
+               if (!port_count(&port[1]))
                        submit = i915_guc_dequeue(engine);
        } while (submit);
 }
index 49c2315..e312dec 100644 (file)
@@ -1233,7 +1233,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
                return false;
 
        /* Both ports drained, no more ELSP submission? */
-       if (engine->execlist_port[0].request)
+       if (port_request(&engine->execlist_port[0]))
                return false;
 
        /* Ring stopped? */
index 9a1192d..53ec0d5 100644 (file)
@@ -337,39 +337,32 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
-       struct drm_i915_private *dev_priv = engine->i915;
        struct execlist_port *port = engine->execlist_port;
        u32 __iomem *elsp =
-               dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
-       u64 desc[2];
-
-       GEM_BUG_ON(port[0].count > 1);
-       if (!port[0].count)
-               execlists_context_status_change(port[0].request,
-                                               INTEL_CONTEXT_SCHEDULE_IN);
-       desc[0] = execlists_update_context(port[0].request);
-       GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0]));
-       port[0].count++;
-
-       if (port[1].request) {
-               GEM_BUG_ON(port[1].count);
-               execlists_context_status_change(port[1].request,
-                                               INTEL_CONTEXT_SCHEDULE_IN);
-               desc[1] = execlists_update_context(port[1].request);
-               GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1]));
-               port[1].count = 1;
-       } else {
-               desc[1] = 0;
-       }
-       GEM_BUG_ON(desc[0] == desc[1]);
+               engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+       unsigned int n;
 
-       /* You must always write both descriptors in the order below. */
-       writel(upper_32_bits(desc[1]), elsp);
-       writel(lower_32_bits(desc[1]), elsp);
+       for (n = ARRAY_SIZE(engine->execlist_port); n--; ) {
+               struct drm_i915_gem_request *rq;
+               unsigned int count;
+               u64 desc;
+
+               rq = port_unpack(&port[n], &count);
+               if (rq) {
+                       GEM_BUG_ON(count > !n);
+                       if (!count++)
+                               execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+                       port_set(&port[n], port_pack(rq, count));
+                       desc = execlists_update_context(rq);
+                       GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
+               } else {
+                       GEM_BUG_ON(!n);
+                       desc = 0;
+               }
 
-       writel(upper_32_bits(desc[0]), elsp);
-       /* The context is automatically loaded after the following */
-       writel(lower_32_bits(desc[0]), elsp);
+               writel(upper_32_bits(desc), elsp);
+               writel(lower_32_bits(desc), elsp);
+       }
 }
 
 static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
@@ -390,6 +383,17 @@ static bool can_merge_ctx(const struct i915_gem_context *prev,
        return true;
 }
 
+static void port_assign(struct execlist_port *port,
+                       struct drm_i915_gem_request *rq)
+{
+       GEM_BUG_ON(rq == port_request(port));
+
+       if (port_isset(port))
+               i915_gem_request_put(port_request(port));
+
+       port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
        struct drm_i915_gem_request *last;
@@ -397,7 +401,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
        struct rb_node *rb;
        bool submit = false;
 
-       last = port->request;
+       last = port_request(port);
        if (last)
                /* WaIdleLiteRestore:bdw,skl
                 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
@@ -407,7 +411,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                 */
                last->tail = last->wa_tail;
 
-       GEM_BUG_ON(port[1].request);
+       GEM_BUG_ON(port_isset(&port[1]));
 
        /* Hardware submission is through 2 ports. Conceptually each port
         * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -464,7 +468,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
                        GEM_BUG_ON(last->ctx == cursor->ctx);
 
-                       i915_gem_request_assign(&port->request, last);
+                       if (submit)
+                               port_assign(port, last);
                        port++;
                }
 
@@ -474,12 +479,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                cursor->priotree.priority = INT_MAX;
 
                __i915_gem_request_submit(cursor);
-               trace_i915_gem_request_in(cursor, port - engine->execlist_port);
+               trace_i915_gem_request_in(cursor, port_index(port, engine));
                last = cursor;
                submit = true;
        }
        if (submit) {
-               i915_gem_request_assign(&port->request, last);
+               port_assign(port, last);
                engine->execlist_first = rb;
        }
        spin_unlock_irq(&engine->timeline->lock);
@@ -488,16 +493,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                execlists_submit_ports(engine);
 }
 
-static bool execlists_elsp_idle(struct intel_engine_cs *engine)
-{
-       return !engine->execlist_port[0].request;
-}
-
 static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
 {
        const struct execlist_port *port = engine->execlist_port;
 
-       return port[0].count + port[1].count < 2;
+       return port_count(&port[0]) + port_count(&port[1]) < 2;
 }
 
 /*
@@ -547,7 +547,9 @@ static void intel_lrc_irq_handler(unsigned long data)
                tail = GEN8_CSB_WRITE_PTR(head);
                head = GEN8_CSB_READ_PTR(head);
                while (head != tail) {
+                       struct drm_i915_gem_request *rq;
                        unsigned int status;
+                       unsigned int count;
 
                        if (++head == GEN8_CSB_ENTRIES)
                                head = 0;
@@ -575,22 +577,26 @@ static void intel_lrc_irq_handler(unsigned long data)
 
                        /* Check the context/desc id for this event matches */
                        GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
-                                        port[0].context_id);
+                                        port->context_id);
 
-                       GEM_BUG_ON(port[0].count == 0);
-                       if (--port[0].count == 0) {
+                       rq = port_unpack(port, &count);
+                       GEM_BUG_ON(count == 0);
+                       if (--count == 0) {
                                GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-                               GEM_BUG_ON(!i915_gem_request_completed(port[0].request));
-                               execlists_context_status_change(port[0].request,
-                                                               INTEL_CONTEXT_SCHEDULE_OUT);
+                               GEM_BUG_ON(!i915_gem_request_completed(rq));
+                               execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+
+                               trace_i915_gem_request_out(rq);
+                               i915_gem_request_put(rq);
 
-                               trace_i915_gem_request_out(port[0].request);
-                               i915_gem_request_put(port[0].request);
                                port[0] = port[1];
                                memset(&port[1], 0, sizeof(port[1]));
+                       } else {
+                               port_set(port, port_pack(rq, count));
                        }
 
-                       GEM_BUG_ON(port[0].count == 0 &&
+                       /* After the final element, the hw should be idle */
+                       GEM_BUG_ON(port_count(port) == 0 &&
                                   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
                }
 
@@ -1147,6 +1153,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
        struct drm_i915_private *dev_priv = engine->i915;
        struct execlist_port *port = engine->execlist_port;
        unsigned int n;
+       bool submit;
        int ret;
 
        ret = intel_mocs_init_engine(engine);
@@ -1168,19 +1175,21 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
        /* After a GPU reset, we may have requests to replay */
        clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 
+       submit = false;
        for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
-               if (!port[n].request)
+               if (!port_isset(&port[n]))
                        break;
 
                DRM_DEBUG_DRIVER("Restarting %s:%d from 0x%x\n",
                                 engine->name, n,
-                                port[n].request->global_seqno);
+                                port_request(&port[n])->global_seqno);
 
                /* Discard the current inflight count */
-               port[n].count = 0;
+               port_set(&port[n], port_request(&port[n]));
+               submit = true;
        }
 
-       if (!i915.enable_guc_submission && !execlists_elsp_idle(engine))
+       if (submit && !i915.enable_guc_submission)
                execlists_submit_ports(engine);
 
        return 0;
@@ -1258,13 +1267,13 @@ static void reset_common_ring(struct intel_engine_cs *engine,
        intel_ring_update_space(request->ring);
 
        /* Catch up with any missed context-switch interrupts */
-       if (request->ctx != port[0].request->ctx) {
-               i915_gem_request_put(port[0].request);
+       if (request->ctx != port_request(port)->ctx) {
+               i915_gem_request_put(port_request(port));
                port[0] = port[1];
                memset(&port[1], 0, sizeof(port[1]));
        }
 
-       GEM_BUG_ON(request->ctx != port[0].request->ctx);
+       GEM_BUG_ON(request->ctx != port_request(port)->ctx);
 
        /* Reset WaIdleLiteRestore:bdw,skl as well */
        request->tail =
index ec16fb6..162f0a9 100644 (file)
@@ -368,8 +368,15 @@ struct intel_engine_cs {
        /* Execlists */
        struct tasklet_struct irq_tasklet;
        struct execlist_port {
-               struct drm_i915_gem_request *request;
-               unsigned int count;
+               struct drm_i915_gem_request *request_count;
+#define EXECLIST_COUNT_BITS 2
+#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
+#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
+#define port_set(p, packed) ((p)->request_count = (packed))
+#define port_isset(p) ((p)->request_count)
+#define port_index(p, e) ((p) - (e)->execlist_port)
                GEM_DEBUG_DECL(u32 context_id);
        } execlist_port[2];
        struct rb_root execlist_queue;