drm/i915: Do not overwrite the request with zero on reallocation

author Chris Wilson <chris@chris-wilson.co.uk>

Tue, 9 Aug 2016 08:23:34 +0000 (09:23 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Tue, 9 Aug 2016 09:17:27 +0000 (10:17 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Tue, 9 Aug 2016 08:23:34 +0000 (09:23 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Tue, 9 Aug 2016 09:17:27 +0000 (10:17 +0100)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c

index 6a16616..06d1267 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -355,7 +355,35 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
         if (req && i915_gem_request_completed(req))
                 i915_gem_request_retire(req);
  
-       req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
+       /* Beware: Dragons be flying overhead.
+        *
+        * We use RCU to look up requests in flight. The lookups may
+        * race with the request being allocated from the slab freelist.
+        * That is the request we are writing to here, may be in the process
+        * of being read by __i915_gem_active_get_request_rcu(). As such,
+        * we have to be very careful when overwriting the contents. During
+        * the RCU lookup, we change chase the request->engine pointer,
+        * read the request->fence.seqno and increment the reference count.
+        *
+        * The reference count is incremented atomically. If it is zero,
+        * the lookup knows the request is unallocated and complete. Otherwise,
+        * it is either still in use, or has been reallocated and reset
+        * with fence_init(). This increment is safe for release as we check
+        * that the request we have a reference to and matches the active
+        * request.
+        *
+        * Before we increment the refcount, we chase the request->engine
+        * pointer. We must not call kmem_cache_zalloc() or else we set
+        * that pointer to NULL and cause a crash during the lookup. If
+        * we see the request is completed (based on the value of the
+        * old engine and seqno), the lookup is complete and reports NULL.
+        * If we decide the request is not completed (new engine or seqno),
+        * then we grab a reference and double check that it is still the
+        * active request - which it won't be and restart the lookup.
+        *
+        * Do not use kmem_cache_zalloc() here!
+        */
+       req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
         if (!req)
                 return ERR_PTR(-ENOMEM);
  
@@ -375,6 +403,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
         req->engine = engine;
         req->ctx = i915_gem_context_get(ctx);
  
+       /* No zalloc, must clear what we need by hand */
+       req->previous_context = NULL;
+       req->file_priv = NULL;
+       req->batch_obj = NULL;
+       req->pid = NULL;
+       req->elsp_submitted = 0;
+
         /*
          * Reserve space in the ring buffer for all the commands required to
          * eventually emit this request. This is to guarantee that the
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h

index 6dd83b1..4e91d49 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -51,6 +51,13 @@ struct intel_signal_node {
   * emission time to be associated with the request for tracking how far ahead
   * of the GPU the submission is.
   *
+ * When modifying this structure be very aware that we perform a lockless
+ * RCU lookup of it that may race against reallocation of the struct
+ * from the slab freelist. We intentionally do not zero the structure on
+ * allocation so that the lookup can use the dangling pointers (and is
+ * cogniscent that those pointers may be wrong). Instead, everything that
+ * needs to be initialised must be done so explicitly.
+ *
   * The requests are reference counted.
   */
  struct drm_i915_gem_request {
@@ -458,6 +465,10 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
          * just report the active tracker is idle. If the new request is
          * incomplete, then we acquire a reference on it and check that
          * it remained the active request.
+        *
+        * It is then imperative that we do not zero the request on
+        * reallocation, so that we can chase the dangling pointers!
+        * See i915_gem_request_alloc().
          */
         do {
                 struct drm_i915_gem_request *request;
author	Chris Wilson <chris@chris-wilson.co.uk>
	Tue, 9 Aug 2016 08:23:34 +0000 (09:23 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Tue, 9 Aug 2016 09:17:27 +0000 (10:17 +0100)
drivers/gpu/drm/i915/i915_gem_request.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_request.h		patch \| blob \| history