Merge tag 'v3.12-rc2' into drm-intel-next

[platform/kernel/linux-stable.git] / drivers / gpu / drm / i915 / i915_irq.c
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c

index a9233e2..b356dc1 100644 (file)
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1483,6 +1483,34 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
         return ret;
  }
  
+static void i915_error_wake_up(struct drm_i915_private *dev_priv,
+                              bool reset_completed)
+{
+       struct intel_ring_buffer *ring;
+       int i;
+
+       /*
+        * Notify all waiters for GPU completion events that reset state has
+        * been changed, and that they need to restart their wait after
+        * checking for potential errors (and bail out to drop locks if there is
+        * a gpu reset pending so that i915_error_work_func can acquire them).
+        */
+
+       /* Wake up __wait_seqno, potentially holding dev->struct_mutex. */
+       for_each_ring(ring, dev_priv, i)
+               wake_up_all(&ring->irq_queue);
+
+       /* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */
+       wake_up_all(&dev_priv->pending_flip_queue);
+
+       /*
+        * Signal tasks blocked in i915_gem_wait_for_error that the pending
+        * reset state is cleared.
+        */
+       if (reset_completed)
+               wake_up_all(&dev_priv->gpu_error.reset_queue);
+}
+
  /**
   * i915_error_work_func - do process context error handling work
   * @work: work struct
@@ -1497,11 +1525,10 @@ static void i915_error_work_func(struct work_struct *work)
         drm_i915_private_t *dev_priv = container_of(error, drm_i915_private_t,
                                                     gpu_error);
         struct drm_device *dev = dev_priv->dev;
-       struct intel_ring_buffer *ring;
         char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
         char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
         char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
-       int i, ret;
+       int ret;
  
         kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event);
  
@@ -1520,8 +1547,16 @@ static void i915_error_work_func(struct work_struct *work)
                 kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE,
                                    reset_event);
  
+               /*
+                * All state reset _must_ be completed before we update the
+                * reset counter, for otherwise waiters might miss the reset
+                * pending state and not properly drop locks, resulting in
+                * deadlocks with the reset work.
+                */
                 ret = i915_reset(dev);
  
+               intel_display_handle_reset(dev);
+
                 if (ret == 0) {
                         /*
                          * After all the gem state is reset, increment the reset
@@ -1542,12 +1577,11 @@ static void i915_error_work_func(struct work_struct *work)
                         atomic_set(&error->reset_counter, I915_WEDGED);
                 }
  
-               for_each_ring(ring, dev_priv, i)
-                       wake_up_all(&ring->irq_queue);
-
-               intel_display_handle_reset(dev);
-
-               wake_up_all(&dev_priv->gpu_error.reset_queue);
+               /*
+                * Note: The wake_up also serves as a memory barrier so that
+                * waiters see the update value of the reset counter atomic_t.
+                */
+               i915_error_wake_up(dev_priv, true);
         }
  }
  
@@ -1656,8 +1690,6 @@ static void i915_report_and_clear_eir(struct drm_device *dev)
  void i915_handle_error(struct drm_device *dev, bool wedged)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
-       struct intel_ring_buffer *ring;
-       int i;
  
         i915_capture_error_state(dev);
         i915_report_and_clear_eir(dev);
@@ -1667,14 +1699,28 @@ void i915_handle_error(struct drm_device *dev, bool wedged)
                                 &dev_priv->gpu_error.reset_counter);
  
                 /*
-                * Wakeup waiting processes so that the reset work item
-                * doesn't deadlock trying to grab various locks.
+                * Wakeup waiting processes so that the reset work function
+                * i915_error_work_func doesn't deadlock trying to grab various
+                * locks. By bumping the reset counter first, the woken
+                * processes will see a reset in progress and back off,
+                * releasing their locks and then wait for the reset completion.
+                * We must do this for _all_ gpu waiters that might hold locks
+                * that the reset work needs to acquire.
+                *
+                * Note: The wake_up serves as the required memory barrier to
+                * ensure that the waiters see the updated value of the reset
+                * counter atomic_t.
                  */
-               for_each_ring(ring, dev_priv, i)
-                       wake_up_all(&ring->irq_queue);
+               i915_error_wake_up(dev_priv, false);
         }
  
-       queue_work(dev_priv->wq, &dev_priv->gpu_error.work);
+       /*
+        * Our reset work can grab modeset locks (since it needs to reset the
+        * state of outstanding pagelips). Hence it must not be run on our own
+        * dev-priv->wq work queue for otherwise the flush_work in the pageflip
+        * code will deadlock.
+        */
+       schedule_work(&dev_priv->gpu_error.work);
  }
  
  static void __always_unused i915_pageflip_stall_check(struct drm_device *dev, int pipe)