drm/msm/a6xx: Make GMU reset useful

author Jordan Crouse <jcrouse@codeaurora.org>

Mon, 4 Feb 2019 16:15:42 +0000 (09:15 -0700)

committer Rob Clark <robdclark@chromium.org>

Fri, 19 Apr 2019 18:50:06 +0000 (11:50 -0700)
author Jordan Crouse <jcrouse@codeaurora.org>
Mon, 4 Feb 2019 16:15:42 +0000 (09:15 -0700)
committer Rob Clark <robdclark@chromium.org>
Fri, 19 Apr 2019 18:50:06 +0000 (11:50 -0700)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c

index f804cfe..b3bc2a5 100644 (file)
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -10,6 +10,24 @@
  #include "a6xx_gpu.h"
  #include "a6xx_gmu.xml.h"
  
+static void a6xx_gmu_fault(struct a6xx_gmu *gmu)
+{
+       struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
+       struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+       struct msm_gpu *gpu = &adreno_gpu->base;
+       struct drm_device *dev = gpu->dev;
+       struct msm_drm_private *priv = dev->dev_private;
+
+       /* FIXME: add a banner here */
+       gmu->hung = true;
+
+       /* Turn off the hangcheck timer while we are resetting */
+       del_timer(&gpu->hangcheck_timer);
+
+       /* Queue the GPU handler because we need to treat this as a recovery */
+       queue_work(priv->wq, &gpu->recover_work);
+}
+
  static irqreturn_t a6xx_gmu_irq(int irq, void *data)
  {
         struct a6xx_gmu *gmu = data;
@@ -21,8 +39,7 @@ static irqreturn_t a6xx_gmu_irq(int irq, void *data)
         if (status & A6XX_GMU_AO_HOST_INTERRUPT_STATUS_WDOG_BITE) {
                 dev_err_ratelimited(gmu->dev, "GMU watchdog expired\n");
  
-               /* Temporary until we can recover safely */
-               BUG();
+               a6xx_gmu_fault(gmu);
         }
  
         if (status &  A6XX_GMU_AO_HOST_INTERRUPT_STATUS_HOST_AHB_BUS_ERROR)
@@ -46,8 +63,7 @@ static irqreturn_t a6xx_hfi_irq(int irq, void *data)
         if (status & A6XX_GMU_GMU2HOST_INTR_INFO_CM3_FAULT) {
                 dev_err_ratelimited(gmu->dev, "GMU firmware fault\n");
  
-               /* Temporary until we can recover safely */
-               BUG();
+               a6xx_gmu_fault(gmu);
         }
  
         return IRQ_HANDLED;
@@ -166,10 +182,8 @@ static bool a6xx_gmu_check_idle_level(struct a6xx_gmu *gmu)
  }
  
  /* Wait for the GMU to get to its most idle state */
-int a6xx_gmu_wait_for_idle(struct a6xx_gpu *a6xx_gpu)
+int a6xx_gmu_wait_for_idle(struct a6xx_gmu *gmu)
  {
-       struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
-
         return spin_until(a6xx_gmu_check_idle_level(gmu));
  }
  
@@ -568,7 +582,7 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state)
                 if (!rpmh_init) {
                         a6xx_gmu_rpmh_init(gmu);
                         rpmh_init = true;
-               } else if (state != GMU_RESET) {
+               } else {
                         ret = a6xx_rpmh_start(gmu);
                         if (ret)
                                 return ret;
@@ -657,10 +671,9 @@ static void a6xx_gmu_irq_disable(struct a6xx_gmu *gmu)
         gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, ~0);
  }
  
-int a6xx_gmu_reset(struct a6xx_gpu *a6xx_gpu)
+/* Force the GMU off in case it isn't responsive */
+static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
  {
-       struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
-       int ret;
         u32 val;
  
         /* Flush all the queues */
@@ -681,44 +694,6 @@ int a6xx_gmu_reset(struct a6xx_gpu *a6xx_gpu)
                 (val & 1), 100, 10000);
         gmu_poll_timeout(gmu, REG_A6XX_RSCC_TCS3_DRV0_STATUS, val,
                 (val & 1), 100, 1000);
-
-       /*
-        * Depending on the state of the GMU at this point the GX domain might
-        * have been left on. Hardware sequencing rules state that the GX has to
-        * be turned off before the CX domain so this is that one time that
-        * that calling pm_runtime_put_sync() is expected to do something useful
-        * (turn off the headswitch)
-        */
-       if (!IS_ERR(gmu->gxpd))
-               pm_runtime_put_sync(gmu->gxpd);
-
-       /* Disable the resources */
-       clk_bulk_disable_unprepare(gmu->nr_clocks, gmu->clocks);
-       pm_runtime_put_sync(gmu->dev);
-
-       /* Re-enable the resources */
-       pm_runtime_get_sync(gmu->dev);
-
-       /* Use a known rate to bring up the GMU */
-       clk_set_rate(gmu->core_clk, 200000000);
-       ret = clk_bulk_prepare_enable(gmu->nr_clocks, gmu->clocks);
-       if (ret)
-               goto out;
-
-       a6xx_gmu_irq_enable(gmu);
-
-       ret = a6xx_gmu_fw_start(gmu, GMU_RESET);
-       if (!ret)
-               ret = a6xx_hfi_start(gmu, GMU_COLD_BOOT);
-
-       /* Set the GPU back to the highest power frequency */
-       __a6xx_gmu_set_freq(gmu, gmu->nr_gpu_freqs - 1);
-
-out:
-       if (ret)
-               a6xx_gmu_clear_oob(gmu, GMU_OOB_BOOT_SLUMBER);
-
-       return ret;
  }
  
  int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
@@ -731,6 +706,8 @@ int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
         if (WARN(!gmu->mmio, "The GMU is not set up yet\n"))
                 return 0;
  
+       gmu->hung = false;
+
         /* Turn on the resources */
         pm_runtime_get_sync(gmu->dev);
  
@@ -789,11 +766,9 @@ bool a6xx_gmu_isidle(struct a6xx_gmu *gmu)
         return true;
  }
  
-int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu)
+/* Gracefully try to shut down the GMU and by extension the GPU */
+static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu)
  {
-       struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
-       struct msm_gpu *gpu = &adreno_gpu->base;
-       struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
         u32 val;
  
         /*
@@ -803,10 +778,13 @@ int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu)
         val = gmu_read(gmu, REG_A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE);
  
         if (val != 0xf) {
-               int ret = a6xx_gmu_wait_for_idle(a6xx_gpu);
+               int ret = a6xx_gmu_wait_for_idle(gmu);
  
-               /* Temporary until we can recover safely */
-               BUG_ON(ret);
+               /* If the GMU isn't responding assume it is hung */
+               if (ret) {
+                       a6xx_gmu_force_off(gmu);
+                       return;
+               }
  
                 /* tell the GMU we want to slumber */
                 a6xx_gmu_notify_slumber(gmu);
@@ -838,14 +816,30 @@ int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu)
  
         /* Tell RPMh to power off the GPU */
         a6xx_rpmh_stop(gmu);
+}
+
+
+int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu)
+{
+       struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
+       struct msm_gpu *gpu = &a6xx_gpu->base.base;
+
+       /*
+        * Force the GMU off if we detected a hang, otherwise try to shut it
+        * down gracefully
+        */
+       if (gmu->hung)
+               a6xx_gmu_force_off(gmu);
+       else
+               a6xx_gmu_shutdown(gmu);
  
         /* Remove the bus vote */
         icc_set_bw(gpu->icc_path, 0, 0);
  
         /*
-        * Mark the GPU power domain as off. During the shutdown process the GMU
-        * should actually turn off the power so this is really just a
-        * houskeeping step
+        * Make sure the GX domain is off before turning off the GMU (CX)
+        * domain. Usually the GMU does this but only if the shutdown sequence
+        * was successful
          */
         if (!IS_ERR(gmu->gxpd))
                 pm_runtime_put_sync(gmu->gxpd);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h

index 078d418..c5b1887 100644 (file)
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
@@ -27,9 +27,6 @@ struct a6xx_gmu_bo {
  /* the GMU is coming up for the first time or back from a power collapse */
  #define GMU_COLD_BOOT 1
  
-/* The GMU is being soft reset after a fault */
-#define GMU_RESET 2
-
  /*
   * These define the level of control that the GMU has - the higher the number
   * the more things that the GMU hardware controls on its own.
@@ -79,6 +76,7 @@ struct a6xx_gmu {
         struct a6xx_hfi_queue queues[2];
  
         struct tasklet_struct hfi_tasklet;
+       bool hung;
  };
  
  static inline u32 gmu_read(struct a6xx_gmu *gmu, u32 offset)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c

index fefe773..f76d8cd 100644 (file)
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -698,7 +698,7 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
          * Make sure the GMU is idle before continuing (because some transitions
          * may use VBIF
          */
-       a6xx_gmu_wait_for_idle(a6xx_gpu);
+       a6xx_gmu_wait_for_idle(&a6xx_gpu->gmu);
  
         /* Clear the VBIF pipe before shutting down */
         /* FIXME: This accesses the GPU - do we need to make sure it is on? */
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h

index 528a4cf..b46279e 100644 (file)
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -46,9 +46,8 @@ struct a6xx_gpu {
  int a6xx_gmu_resume(struct a6xx_gpu *gpu);
  int a6xx_gmu_stop(struct a6xx_gpu *gpu);
  
-int a6xx_gmu_wait_for_idle(struct a6xx_gpu *gpu);
+int a6xx_gmu_wait_for_idle(struct a6xx_gmu *gmu);
  
-int a6xx_gmu_reset(struct a6xx_gpu *a6xx_gpu);
  bool a6xx_gmu_isidle(struct a6xx_gmu *gmu);
  
  int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state);
author	Jordan Crouse <jcrouse@codeaurora.org>
	Mon, 4 Feb 2019 16:15:42 +0000 (09:15 -0700)
committer	Rob Clark <robdclark@chromium.org>
	Fri, 19 Apr 2019 18:50:06 +0000 (11:50 -0700)
drivers/gpu/drm/msm/adreno/a6xx_gmu.c		patch \| blob \| history
drivers/gpu/drm/msm/adreno/a6xx_gmu.h		patch \| blob \| history
drivers/gpu/drm/msm/adreno/a6xx_gpu.c		patch \| blob \| history
drivers/gpu/drm/msm/adreno/a6xx_gpu.h		patch \| blob \| history