drm/radeon: use common fence implementation for fences, v4
authorMaarten Lankhorst <maarten.lankhorst@canonical.com>
Thu, 9 Jan 2014 10:03:12 +0000 (11:03 +0100)
committerMaarten Lankhorst <maarten.lankhorst@canonical.com>
Mon, 1 Sep 2014 08:22:08 +0000 (10:22 +0200)
Changes since v1:
- Kill the sw interrupt dance, add and use
  radeon_irq_kms_sw_irq_get_delayed instead.
- Change custom wait function, lockdep complained about it.
  Holding exclusive_lock in the wait function might cause deadlocks.
  Instead do all the processing in .enable_signaling, and wait
  on the global fence_queue to pick up gpu resets.
- Process all fences in radeon_gpu_reset after reset to close a race
  with the trylock in enable_signaling.
Changes since v2:
- Small changes to work with the rewritten lockup recovery patches.
Changes since v3:
- Call radeon_fence_schedule_check when exclusive_lock cannot be
  acquired to always cause a wake up.
- Reset irqs from hangup check.
- Drop reading seqno in the callback, use cached value.
- Fix indentation in radeon_fence_default_wait
- Add a radeon_test_signaled function, drop a few test_bit calls.
- Make to_radeon_fence global.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_fence.c
drivers/gpu/drm/radeon/radeon_irq_kms.c

index 83a2461..d80dc54 100644 (file)
@@ -66,6 +66,7 @@
 #include <linux/kref.h>
 #include <linux/interval_tree.h>
 #include <linux/hashtable.h>
+#include <linux/fence.h>
 
 #include <ttm/ttm_bo_api.h>
 #include <ttm/ttm_bo_driver.h>
@@ -354,17 +355,19 @@ struct radeon_fence_driver {
        /* sync_seq is protected by ring emission lock */
        uint64_t                        sync_seq[RADEON_NUM_RINGS];
        atomic64_t                      last_seq;
-       bool                            initialized;
+       bool                            initialized, delayed_irq;
        struct delayed_work             lockup_work;
 };
 
 struct radeon_fence {
+       struct fence base;
+
        struct radeon_device            *rdev;
-       struct kref                     kref;
-       /* protected by radeon_fence.lock */
        uint64_t                        seq;
        /* RB, DMA, etc. */
        unsigned                        ring;
+
+       wait_queue_t                    fence_wake;
 };
 
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
@@ -782,6 +785,7 @@ struct radeon_irq {
 int radeon_irq_kms_init(struct radeon_device *rdev);
 void radeon_irq_kms_fini(struct radeon_device *rdev);
 void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring);
+bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring);
 void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring);
 void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc);
 void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc);
@@ -2308,6 +2312,7 @@ struct radeon_device {
        struct radeon_mman              mman;
        struct radeon_fence_driver      fence_drv[RADEON_NUM_RINGS];
        wait_queue_head_t               fence_queue;
+       unsigned                        fence_context;
        struct mutex                    ring_lock;
        struct radeon_ring              ring[RADEON_NUM_RINGS];
        bool                            ib_pool_ready;
@@ -2441,7 +2446,17 @@ void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v);
 /*
  * Cast helper
  */
-#define to_radeon_fence(p) ((struct radeon_fence *)(p))
+extern const struct fence_ops radeon_fence_ops;
+
+static inline struct radeon_fence *to_radeon_fence(struct fence *f)
+{
+       struct radeon_fence *__f = container_of(f, struct radeon_fence, base);
+
+       if (__f->base.ops == &radeon_fence_ops)
+               return __f;
+
+       return NULL;
+}
 
 /*
  * Registers read & write functions.
index d30f1cc..e84a76e 100644 (file)
@@ -1253,6 +1253,7 @@ int radeon_device_init(struct radeon_device *rdev,
        for (i = 0; i < RADEON_NUM_RINGS; i++) {
                rdev->ring[i].idx = i;
        }
+       rdev->fence_context = fence_context_alloc(RADEON_NUM_RINGS);
 
        DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n",
                radeon_family_name[rdev->family], pdev->vendor, pdev->device,
index ecdba3a..af9f2d6 100644 (file)
@@ -130,15 +130,18 @@ int radeon_fence_emit(struct radeon_device *rdev,
                      struct radeon_fence **fence,
                      int ring)
 {
+       u64 seq = ++rdev->fence_drv[ring].sync_seq[ring];
+
        /* we are protected by the ring emission mutex */
        *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
        if ((*fence) == NULL) {
                return -ENOMEM;
        }
-       kref_init(&((*fence)->kref));
        (*fence)->rdev = rdev;
-       (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring];
+       (*fence)->seq = seq;
        (*fence)->ring = ring;
+       fence_init(&(*fence)->base, &radeon_fence_ops,
+                  &rdev->fence_queue.lock, rdev->fence_context + ring, seq);
        radeon_fence_ring_emit(rdev, ring, *fence);
        trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
        radeon_fence_schedule_check(rdev, ring);
@@ -146,6 +149,41 @@ int radeon_fence_emit(struct radeon_device *rdev,
 }
 
 /**
+ * radeon_fence_check_signaled - callback from fence_queue
+ *
+ * this function is called with fence_queue lock held, which is also used
+ * for the fence locking itself, so unlocked variants are used for
+ * fence_signal, and remove_wait_queue.
+ */
+static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
+{
+       struct radeon_fence *fence;
+       u64 seq;
+
+       fence = container_of(wait, struct radeon_fence, fence_wake);
+
+       /*
+        * We cannot use radeon_fence_process here because we're already
+        * in the waitqueue, in a call from wake_up_all.
+        */
+       seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
+       if (seq >= fence->seq) {
+               int ret = fence_signal_locked(&fence->base);
+
+               if (!ret)
+                       FENCE_TRACE(&fence->base, "signaled from irq context\n");
+               else
+                       FENCE_TRACE(&fence->base, "was already signaled\n");
+
+               radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
+               __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
+               fence_put(&fence->base);
+       } else
+               FENCE_TRACE(&fence->base, "pending\n");
+       return 0;
+}
+
+/**
  * radeon_fence_activity - check for fence activity
  *
  * @rdev: radeon_device pointer
@@ -242,6 +280,15 @@ static void radeon_fence_check_lockup(struct work_struct *work)
                return;
        }
 
+       if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) {
+               unsigned long irqflags;
+
+               fence_drv->delayed_irq = false;
+               spin_lock_irqsave(&rdev->irq.lock, irqflags);
+               radeon_irq_set(rdev);
+               spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+       }
+
        if (radeon_fence_activity(rdev, ring))
                wake_up_all(&rdev->fence_queue);
 
@@ -276,21 +323,6 @@ void radeon_fence_process(struct radeon_device *rdev, int ring)
 }
 
 /**
- * radeon_fence_destroy - destroy a fence
- *
- * @kref: fence kref
- *
- * Frees the fence object (all asics).
- */
-static void radeon_fence_destroy(struct kref *kref)
-{
-       struct radeon_fence *fence;
-
-       fence = container_of(kref, struct radeon_fence, kref);
-       kfree(fence);
-}
-
-/**
  * radeon_fence_seq_signaled - check if a fence sequence number has signaled
  *
  * @rdev: radeon device pointer
@@ -318,6 +350,75 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
        return false;
 }
 
+static bool radeon_fence_is_signaled(struct fence *f)
+{
+       struct radeon_fence *fence = to_radeon_fence(f);
+       struct radeon_device *rdev = fence->rdev;
+       unsigned ring = fence->ring;
+       u64 seq = fence->seq;
+
+       if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
+               return true;
+       }
+
+       if (down_read_trylock(&rdev->exclusive_lock)) {
+               radeon_fence_process(rdev, ring);
+               up_read(&rdev->exclusive_lock);
+
+               if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
+                       return true;
+               }
+       }
+       return false;
+}
+
+/**
+ * radeon_fence_enable_signaling - enable signalling on fence
+ * @fence: fence
+ *
+ * This function is called with fence_queue lock held, and adds a callback
+ * to fence_queue that checks if this fence is signaled, and if so it
+ * signals the fence and removes itself.
+ */
+static bool radeon_fence_enable_signaling(struct fence *f)
+{
+       struct radeon_fence *fence = to_radeon_fence(f);
+       struct radeon_device *rdev = fence->rdev;
+
+       if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
+               return false;
+
+       if (down_read_trylock(&rdev->exclusive_lock)) {
+               radeon_irq_kms_sw_irq_get(rdev, fence->ring);
+
+               if (radeon_fence_activity(rdev, fence->ring))
+                       wake_up_all_locked(&rdev->fence_queue);
+
+               /* did fence get signaled after we enabled the sw irq? */
+               if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
+                       radeon_irq_kms_sw_irq_put(rdev, fence->ring);
+                       up_read(&rdev->exclusive_lock);
+                       return false;
+               }
+
+               up_read(&rdev->exclusive_lock);
+       } else {
+               /* we're probably in a lockup, lets not fiddle too much */
+               if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
+                       rdev->fence_drv[fence->ring].delayed_irq = true;
+               radeon_fence_schedule_check(rdev, fence->ring);
+       }
+
+       fence->fence_wake.flags = 0;
+       fence->fence_wake.private = NULL;
+       fence->fence_wake.func = radeon_fence_check_signaled;
+       __add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
+       fence_get(f);
+
+       FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
+       return true;
+}
+
 /**
  * radeon_fence_signaled - check if a fence has signaled
  *
@@ -330,8 +431,15 @@ bool radeon_fence_signaled(struct radeon_fence *fence)
 {
        if (!fence)
                return true;
-       if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring))
+
+       if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
+               int ret;
+
+               ret = fence_signal(&fence->base);
+               if (!ret)
+                       FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
                return true;
+       }
        return false;
 }
 
@@ -433,17 +541,15 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
        uint64_t seq[RADEON_NUM_RINGS] = {};
        long r;
 
-       if (fence == NULL) {
-               WARN(1, "Querying an invalid fence : %p !\n", fence);
-               return -EINVAL;
-       }
-
        seq[fence->ring] = fence->seq;
        r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
        if (r < 0) {
                return r;
        }
 
+       r = fence_signal(&fence->base);
+       if (!r)
+               FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
        return 0;
 }
 
@@ -557,7 +663,7 @@ int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
  */
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
 {
-       kref_get(&fence->kref);
+       fence_get(&fence->base);
        return fence;
 }
 
@@ -574,7 +680,7 @@ void radeon_fence_unref(struct radeon_fence **fence)
 
        *fence = NULL;
        if (tmp) {
-               kref_put(&tmp->kref, radeon_fence_destroy);
+               fence_put(&tmp->base);
        }
 }
 
@@ -887,3 +993,72 @@ int radeon_debugfs_fence_init(struct radeon_device *rdev)
        return 0;
 #endif
 }
+
+static const char *radeon_fence_get_driver_name(struct fence *fence)
+{
+       return "radeon";
+}
+
+static const char *radeon_fence_get_timeline_name(struct fence *f)
+{
+       struct radeon_fence *fence = to_radeon_fence(f);
+       switch (fence->ring) {
+       case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
+       case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
+       case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
+       case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
+       case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
+       case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
+       case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
+       case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
+       default: WARN_ON_ONCE(1); return "radeon.unk";
+       }
+}
+
+static inline bool radeon_test_signaled(struct radeon_fence *fence)
+{
+       return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
+}
+
+static signed long radeon_fence_default_wait(struct fence *f, bool intr,
+                                            signed long t)
+{
+       struct radeon_fence *fence = to_radeon_fence(f);
+       struct radeon_device *rdev = fence->rdev;
+       bool signaled;
+
+       fence_enable_sw_signaling(&fence->base);
+
+       /*
+        * This function has to return -EDEADLK, but cannot hold
+        * exclusive_lock during the wait because some callers
+        * may already hold it. This means checking needs_reset without
+        * lock, and not fiddling with any gpu internals.
+        *
+        * The callback installed with fence_enable_sw_signaling will
+        * run before our wait_event_*timeout call, so we will see
+        * both the signaled fence and the changes to needs_reset.
+        */
+
+       if (intr)
+               t = wait_event_interruptible_timeout(rdev->fence_queue,
+                       ((signaled = radeon_test_signaled(fence)) ||
+                        rdev->needs_reset), t);
+       else
+               t = wait_event_timeout(rdev->fence_queue,
+                       ((signaled = radeon_test_signaled(fence)) ||
+                        rdev->needs_reset), t);
+
+       if (t > 0 && !signaled)
+               return -EDEADLK;
+       return t;
+}
+
+const struct fence_ops radeon_fence_ops = {
+       .get_driver_name = radeon_fence_get_driver_name,
+       .get_timeline_name = radeon_fence_get_timeline_name,
+       .enable_signaling = radeon_fence_enable_signaling,
+       .signaled = radeon_fence_is_signaled,
+       .wait = radeon_fence_default_wait,
+       .release = NULL,
+};
index f0bff4b..7784911 100644 (file)
@@ -324,6 +324,21 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring)
 }
 
 /**
+ * radeon_irq_kms_sw_irq_get_delayed - enable software interrupt
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring whose interrupt you want to enable
+ *
+ * Enables the software interrupt for a specific ring (all asics).
+ * The software interrupt is generally used to signal a fence on
+ * a particular ring.
+ */
+bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring)
+{
+       return atomic_inc_return(&rdev->irq.ring_int[ring]) == 1;
+}
+
+/**
  * radeon_irq_kms_sw_irq_put - disable software interrupt
  *
  * @rdev: radeon device pointer