drm/i915: Fix erroneous dereference of batch_obj inside reset_status
authorChris Wilson <chris@chris-wilson.co.uk>
Wed, 4 Dec 2013 11:37:09 +0000 (11:37 +0000)
committerDaniel Vetter <daniel.vetter@ffwll.ch>
Thu, 12 Dec 2013 09:49:05 +0000 (10:49 +0100)
As the rings may be processed and their requests deallocated in a
different order to the natural retirement during a reset,

/* Whilst this request exists, batch_obj will be on the
 * active_list, and so will hold the active reference. Only when this
 * request is retired will the the batch_obj be moved onto the
 * inactive_list and lose its active reference. Hence we do not need
 * to explicitly hold another reference here.
 */

is violated, and the batch_obj may be dereferenced after it had been
freed on another ring. This can be simply avoided by processing the
status update prior to deallocating any requests.

Fixes regression (a possible OOPS following a GPU hang) from
commit aa60c664e6df502578454621c3a9b1f087ff8d25
Author: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date:   Wed Jun 12 15:13:20 2013 +0300

    drm/i915: find guilty batch buffer on ring resets

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: stable@vger.kernel.org
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
[danvet: Add the code comment Chris supplied.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
drivers/gpu/drm/i915/i915_gem.c

index 621c7c67a6439b2e785fc4b0f9898784aad47b01..76d3d1ab73c6965063eba62527594dce82dc41d4 100644 (file)
@@ -2343,15 +2343,24 @@ static void i915_gem_free_request(struct drm_i915_gem_request *request)
        kfree(request);
 }
 
-static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
-                                     struct intel_ring_buffer *ring)
+static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
+                                      struct intel_ring_buffer *ring)
 {
-       u32 completed_seqno;
-       u32 acthd;
+       u32 completed_seqno = ring->get_seqno(ring, false);
+       u32 acthd = intel_ring_get_active_head(ring);
+       struct drm_i915_gem_request *request;
+
+       list_for_each_entry(request, &ring->request_list, list) {
+               if (i915_seqno_passed(completed_seqno, request->seqno))
+                       continue;
 
-       acthd = intel_ring_get_active_head(ring);
-       completed_seqno = ring->get_seqno(ring, false);
+               i915_set_reset_status(ring, request, acthd);
+       }
+}
 
+static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
+                                       struct intel_ring_buffer *ring)
+{
        while (!list_empty(&ring->request_list)) {
                struct drm_i915_gem_request *request;
 
@@ -2359,9 +2368,6 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
                                           struct drm_i915_gem_request,
                                           list);
 
-               if (request->seqno > completed_seqno)
-                       i915_set_reset_status(ring, request, acthd);
-
                i915_gem_free_request(request);
        }
 
@@ -2403,8 +2409,16 @@ void i915_gem_reset(struct drm_device *dev)
        struct intel_ring_buffer *ring;
        int i;
 
+       /*
+        * Before we free the objects from the requests, we need to inspect
+        * them for finding the guilty party. As the requests only borrow
+        * their reference to the objects, the inspection must be done first.
+        */
+       for_each_ring(ring, dev_priv, i)
+               i915_gem_reset_ring_status(dev_priv, ring);
+
        for_each_ring(ring, dev_priv, i)
-               i915_gem_reset_ring_lists(dev_priv, ring);
+               i915_gem_reset_ring_cleanup(dev_priv, ring);
 
        i915_gem_cleanup_ringbuffer(dev);