drm/v3d: Clock V3D down when not in use.
authorEric Anholt <eric@anholt.net>
Thu, 2 May 2019 20:22:53 +0000 (13:22 -0700)
committerDom Cobley <popcornmix@gmail.com>
Mon, 19 Feb 2024 11:32:53 +0000 (11:32 +0000)
My various attempts at re-enabling runtime PM have failed, so just
crank the clock down when V3D is idle to reduce power consumption.

Signed-off-by: Eric Anholt <eric@anholt.net>
drm/v3d: Plug dma_fence leak

The irq_fence and done_fence are given a reference that is never
released. The necessary dma_fence_put()s seem to have been
deleted in error in an earlier commit.

Fixes: 0b73676836b2 ("drm/v3d: Clock V3D down when not in use.")

Signed-off-by: Phil Elwell <phil@raspberrypi.org>
drivers/gpu/drm/v3d/v3d_drv.c
drivers/gpu/drm/v3d/v3d_drv.h
drivers/gpu/drm/v3d/v3d_gem.c

index ffbbe9d..74c6e46 100644 (file)
@@ -256,6 +256,21 @@ static int v3d_platform_drm_probe(struct platform_device *pdev)
                }
        }
 
+       v3d->clk = devm_clk_get(dev, NULL);
+       if (IS_ERR(v3d->clk)) {
+               if (ret != -EPROBE_DEFER)
+                       dev_err(dev, "Failed to get clock\n");
+               goto dev_free;
+       }
+       v3d->clk_up_rate = clk_get_rate(v3d->clk);
+       /* For downclocking, drop it to the minimum frequency we can get from
+        * the CPRMAN clock generator dividing off our parent.  The divider is
+        * 4 bits, but ask for just higher than that so that rounding doesn't
+        * make cprman reject our rate.
+        */
+       v3d->clk_down_rate =
+               (clk_get_rate(clk_get_parent(v3d->clk)) / (1 << 4)) + 10000;
+
        if (v3d->ver < 41) {
                ret = map_regs(v3d, &v3d->gca_regs, "gca");
                if (ret)
@@ -281,6 +296,9 @@ static int v3d_platform_drm_probe(struct platform_device *pdev)
        if (ret)
                goto irq_disable;
 
+       ret = clk_set_rate(v3d->clk, v3d->clk_down_rate);
+       WARN_ON_ONCE(ret != 0);
+
        return 0;
 
 irq_disable:
index 7f664a4..73ba92f 100644 (file)
@@ -76,6 +76,12 @@ struct v3d_dev {
        void __iomem *bridge_regs;
        void __iomem *gca_regs;
        struct clk *clk;
+       struct delayed_work clk_down_work;
+       unsigned long clk_up_rate, clk_down_rate;
+       struct mutex clk_lock;
+       u32 clk_refcount;
+       bool clk_up;
+
        struct reset_control *reset;
 
        /* Virtual and DMA addresses of the single shared page table. */
index 2e94ce7..e883bed 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
 #include <linux/io.h>
+#include <linux/clk.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 #include "v3d_trace.h"
 
 static void
+v3d_clock_down_work(struct work_struct *work)
+{
+       struct v3d_dev *v3d =
+               container_of(work, struct v3d_dev, clk_down_work.work);
+       int ret;
+
+       ret = clk_set_rate(v3d->clk, v3d->clk_down_rate);
+       v3d->clk_up = false;
+       WARN_ON_ONCE(ret != 0);
+}
+
+static void
+v3d_clock_up_get(struct v3d_dev *v3d)
+{
+       mutex_lock(&v3d->clk_lock);
+       if (v3d->clk_refcount++ == 0) {
+               cancel_delayed_work_sync(&v3d->clk_down_work);
+               if (!v3d->clk_up)  {
+                       int ret;
+
+                       ret = clk_set_rate(v3d->clk, v3d->clk_up_rate);
+                       WARN_ON_ONCE(ret != 0);
+                       v3d->clk_up = true;
+               }
+       }
+       mutex_unlock(&v3d->clk_lock);
+}
+
+static void
+v3d_clock_up_put(struct v3d_dev *v3d)
+{
+       mutex_lock(&v3d->clk_lock);
+       if (--v3d->clk_refcount == 0) {
+               schedule_delayed_work(&v3d->clk_down_work,
+                                     msecs_to_jiffies(100));
+       }
+       mutex_unlock(&v3d->clk_lock);
+}
+
+
+static void
 v3d_init_core(struct v3d_dev *v3d, int core)
 {
        /* Set OVRTMUOUT, which means that the texture sampler uniform
@@ -318,6 +360,7 @@ static void
 v3d_job_free(struct kref *ref)
 {
        struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
+       struct v3d_dev *v3d = job->v3d;
        int i;
 
        if (job->bo) {
@@ -329,6 +372,8 @@ v3d_job_free(struct kref *ref)
        dma_fence_put(job->irq_fence);
        dma_fence_put(job->done_fence);
 
+       v3d_clock_up_put(v3d);
+
        if (job->perfmon)
                v3d_perfmon_put(job->perfmon);
 
@@ -448,6 +493,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
                        goto fail_deps;
        }
 
+       v3d_clock_up_get(v3d);
        kref_init(&job->refcount);
 
        return 0;
@@ -1031,6 +1077,9 @@ v3d_gem_init(struct drm_device *dev)
        if (ret)
                return ret;
 
+       mutex_init(&v3d->clk_lock);
+       INIT_DELAYED_WORK(&v3d->clk_down_work, v3d_clock_down_work);
+
        /* Note: We don't allocate address 0.  Various bits of HW
         * treat 0 as special, such as the occlusion query counters
         * where 0 means "disabled".