drm/msm/a6xx: Fix stale rpmh votes from GPU
authorAkhil P Oommen <quic_akhilpo@quicinc.com>
Tue, 25 Feb 2025 19:52:14 +0000 (01:22 +0530)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 25 Apr 2025 08:45:52 +0000 (10:45 +0200)
commit f561db72a663f8a73c2250bf3244ce1ce221bed7 upstream.

It was observed on sc7180 (A618 gpu) that GPU votes for GX rail and CNOC
BCM nodes were not removed after GPU suspend. This was because we
skipped sending 'prepare-slumber' request to gmu during suspend sequence
in some cases. So, make sure we always call prepare-slumber hfi during
suspend. Also, calling prepare-slumber without a prior oob-gpu handshake
messes up gmu firmware's internal state. So, do that when required.

Fixes: 4b565ca5a2cb ("drm/msm: Add A6XX device support")
Cc: stable@vger.kernel.org
Signed-off-by: Akhil P Oommen <quic_akhilpo@quicinc.com>
Patchwork: https://patchwork.freedesktop.org/patch/639569/
Signed-off-by: Rob Clark <robdclark@chromium.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/gpu/drm/msm/adreno/a6xx_gmu.c

index 9009442b543dda23db38afe5390b4a876cf9d005..e7136b7759cb33e95fb9c9a2d9f5f27c01948dd6 100644 (file)
@@ -1042,49 +1042,50 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu)
        struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
        struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
        u32 val;
+       int ret;
 
        /*
-        * The GMU may still be in slumber unless the GPU started so check and
-        * skip putting it back into slumber if so
+        * GMU firmware's internal power state gets messed up if we send "prepare_slumber" hfi when
+        * oob_gpu handshake wasn't done after the last wake up. So do a dummy handshake here when
+        * required
         */
-       val = gmu_read(gmu, REG_A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE);
+       if (adreno_gpu->base.needs_hw_init) {
+               if (a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET))
+                       goto force_off;
 
-       if (val != 0xf) {
-               int ret = a6xx_gmu_wait_for_idle(gmu);
+               a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
+       }
 
-               /* If the GMU isn't responding assume it is hung */
-               if (ret) {
-                       a6xx_gmu_force_off(gmu);
-                       return;
-               }
+       ret = a6xx_gmu_wait_for_idle(gmu);
 
-               a6xx_bus_clear_pending_transactions(adreno_gpu, a6xx_gpu->hung);
+       /* If the GMU isn't responding assume it is hung */
+       if (ret)
+               goto force_off;
 
-               /* tell the GMU we want to slumber */
-               ret = a6xx_gmu_notify_slumber(gmu);
-               if (ret) {
-                       a6xx_gmu_force_off(gmu);
-                       return;
-               }
+       a6xx_bus_clear_pending_transactions(adreno_gpu, a6xx_gpu->hung);
 
-               ret = gmu_poll_timeout(gmu,
-                       REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, val,
-                       !(val & A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB),
-                       100, 10000);
+       /* tell the GMU we want to slumber */
+       ret = a6xx_gmu_notify_slumber(gmu);
+       if (ret)
+               goto force_off;
 
-               /*
-                * Let the user know we failed to slumber but don't worry too
-                * much because we are powering down anyway
-                */
+       ret = gmu_poll_timeout(gmu,
+               REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, val,
+               !(val & A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB),
+               100, 10000);
 
-               if (ret)
-                       DRM_DEV_ERROR(gmu->dev,
-                               "Unable to slumber GMU: status = 0%x/0%x\n",
-                               gmu_read(gmu,
-                                       REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS),
-                               gmu_read(gmu,
-                                       REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2));
-       }
+       /*
+        * Let the user know we failed to slumber but don't worry too
+        * much because we are powering down anyway
+        */
+
+       if (ret)
+               DRM_DEV_ERROR(gmu->dev,
+                       "Unable to slumber GMU: status = 0%x/0%x\n",
+                       gmu_read(gmu,
+                               REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS),
+                       gmu_read(gmu,
+                               REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2));
 
        /* Turn off HFI */
        a6xx_hfi_stop(gmu);
@@ -1094,6 +1095,11 @@ static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu)
 
        /* Tell RPMh to power off the GPU */
        a6xx_rpmh_stop(gmu);
+
+       return;
+
+force_off:
+       a6xx_gmu_force_off(gmu);
 }